In [None]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime

In [None]:
# Define date to conduct the analysis with (default set to today, override if required)
date=datetime.today().strftime('%Y.%m.%d')
#date='2025.03.21'
print('Data analysis of certified devices started for date: '+date)

# Read data from file
certificationsData = pd.read_csv('certified_devices_'+date+'.csv')

# Convert date column to datetime
certificationsData['Datetime'] = pd.to_datetime(certificationsData['date'])

In [None]:
# sum up the products per date
count_per_date = certificationsData.groupby(certificationsData['Datetime'].dt.date).size()
count_per_date_df = pd.DataFrame(count_per_date, columns=['count'])
count_per_date_df = count_per_date_df.reset_index()

# Generate a DataFrame with all dates in the range
date_range = pd.date_range(start=certificationsData['Datetime'].min(), end=certificationsData['Datetime'].max(), freq='D')
all_dates_df = pd.DataFrame({'Datetime': date_range})
all_dates_df['Datetime'] = pd.to_datetime(all_dates_df['Datetime']).dt.date

# Merge the all_dates_df with the original DataFrame to add missing dates and 
count_per_date = pd.merge(all_dates_df, count_per_date_df, on='Datetime', how='left')
count_per_date = count_per_date.set_index('Datetime')

# Fill dates with no certifications with 0
count_per_date['count'] = count_per_date['count'].fillna(0)

print('Maximum Count: '+str(count_per_date['count'].max()))
print('Mean Count: '+str(count_per_date['count'].mean()))

# Plot the data
plt.figure(figsize=(15, 6))
plt.bar(count_per_date.index, count_per_date['count'])
plt.xlabel('Date')
plt.ylabel('Certification count')
#plt.title('Number of certifications per day')
plt.xticks(count_per_date.index[::10], rotation=90)

# Remove empty spaces on the left and right sides
plt.xlim(count_per_date.index.min(), count_per_date.index.max())

# Save the plot
plt.savefig('certifications_per_date.png')
# Show the plot
plt.show()

In [None]:


plt.figure(figsize=(15, 1))
plt.boxplot(count_per_date['count'], vert=False, showmeans=True)

plt.xticks(count_per_date['count'])
plt.grid(True)
#plt.title('Boxplot of certification count per day')
plt.yticks([])
plt.xlabel('Certification count per day')

# Save the plot
plt.savefig('certifications_per_date.png')
# Show the plot
plt.show()

In [None]:
# Calculate cumulative sum of entries
cumulative_count = count_per_date['count'].cumsum()

#'certifications as of 23.05.2023
print('certifications as of '+str(cumulative_count.index[206])+': '+ str(cumulative_count[cumulative_count.index[206]]))

#'certifications as of 23.05.2023
print('certifications as of '+str(cumulative_count.index[391])+': '+ str(cumulative_count[cumulative_count.index[391]]))

# Plot the cumulative sum
plt.figure(figsize=(15, 6))
plt.plot(cumulative_count.index, cumulative_count.values, linestyle='-')
plt.fill_between(count_per_date.index, cumulative_count.values,  color='skyblue', alpha=0.5 )

plt.xlabel('Date')
plt.ylabel('Cumulative certifications')
#plt.title('Accumulated certifications over time')
plt.xticks(count_per_date.index[::10], rotation=90)
# Remove empty spaces on the left and right sides
plt.xlim(count_per_date.index.min(), count_per_date.index.max())

# Save the plot
plt.savefig('certifications_per_date_accumulated.png')
# Show the plot
plt.show()
print(cumulative_count.values.max())


#PLot the trend
plt.figure(figsize=(15, 6))
plt.plot(cumulative_count.index, cumulative_count.values, linestyle='-')
plt.fill_between(count_per_date.index, cumulative_count.values,  color='skyblue', alpha=0.5 )


# plot trend line
plt.plot([cumulative_count.index.min(), cumulative_count.index.max()], [0, cumulative_count.values.max()], color='blue', linewidth=2, label=f'Trend line')

plt.plot(pd.Timestamp('2023-05-18'), 0, marker='o', markersize=5, color='red', label=f'Matter update released')
plt.plot(pd.Timestamp('2023-10-23'), 0, marker='o', markersize=5, color='red')
plt.plot(pd.Timestamp('2024-05-08'), 0, marker='o', markersize=5, color='red')
plt.plot(pd.Timestamp('2024-11-07'), 0, marker='o', markersize=5, color='red')
plt.xlabel('Date')

plt.ylabel('Cumulative certifications')
#plt.title('Accumulated certifications over time')
plt.xticks(count_per_date.index[::10], rotation=90)
# Remove empty spaces on the left and right sides
plt.xlim(count_per_date.index.min(), count_per_date.index.max())

plt.legend()

# Save the plot
plt.savefig('certifications_per_date_accumulated_trend.png')
# Show the plot
plt.show()



In [None]:
# Calculate cumulative sum of entries + recertifications
certificationCountDictionary = {}
certcounts = []
for index, row in certificationsData.iterrows():
    certCount=1
    vidPid=str(row['vid'])+str(row['pid'])
    if(vidPid in certificationCountDictionary):
        certCount=certificationCountDictionary[vidPid]+1
    certificationCountDictionary[vidPid]=certCount
    certcounts.append(certCount)
certificationsData['certCount']=certcounts

# sum up the products per type
count_per_recertification = certificationsData.groupby(certificationsData['certCount']).size()

# Plot the data
plt.figure(figsize=(15, 6))
bars=plt.bar(count_per_recertification.index, count_per_recertification.values)
plt.xlabel('Certification count')
plt.ylabel('Count')
#plt.title('Number of certification count per device')


# Trim empty space on the left and right
plt.xlim(0.5, len(count_per_recertification) + 0.5)

# Annotate each bar with its value
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height}', ha='center', va='bottom')


# Save the plot
plt.savefig('recertification_counts.png')
# Show the plot
plt.show()

In [None]:
# sum up the products per type
count_per_product_type = certificationsData.groupby(certificationsData['Device Type']).size()

# Plot the data
plt.figure(figsize=(15, 6))
bars=plt.bar(count_per_product_type.index, count_per_product_type.values)
plt.xlabel('Device type')
plt.ylabel('Count')
#plt.title('Number of certifications per device type')
plt.xticks(rotation=90)


# Trim empty space on the left and right
plt.xlim(-0.5, len(count_per_product_type) - 0.5)

# Annotate each bar with its value
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height}', ha='center', va='bottom')


# Save the plot
plt.savefig('certified_product_types.png')
# Show the plot
plt.show()

In [None]:
# sum up the products per type
count_per_product_type_group = certificationsData.groupby(certificationsData['Device Type Group']).size()

# Plot the data
plt.figure(figsize=(15, 6))
bars=plt.bar(count_per_product_type_group.index, count_per_product_type_group.values)
plt.xlabel('Device type group')
plt.ylabel('Count')
#plt.title('Number of certifications per device type group')
plt.xticks(rotation=90)

# Trim empty space on the left and right
plt.xlim(-0.5, len(count_per_product_type_group) - 0.5)

# Annotate each bar with its value
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height}', ha='center', va='bottom')


# Save the plot
plt.savefig('certified_products_per_type_group.png')
# Show the plot
plt.show()

In [None]:
# types over time

# Calculate cumulative sum of entries
cumulative_count = count_per_date['count'].cumsum()

# Generate a DataFrame with all dates in the range
date_range = pd.date_range(start=certificationsData['Datetime'].min(), end=certificationsData['Datetime'].max(), freq='D')
all_dates_df = pd.DataFrame({'Datetime': date_range})
all_dates_df['Datetime'] = pd.to_datetime(all_dates_df['Datetime']).dt.date

# sum up the HVAC products per date
count_per_date_hvac = certificationsData[certificationsData['Device Type Group']=='HVAC'].groupby(certificationsData['Datetime'].dt.date).size()
count_per_date_hvac_df = pd.DataFrame(count_per_date_hvac, columns=['count'])
count_per_date_hvac_df = count_per_date_hvac_df.reset_index()
# Merge the all_dates_df with the original DataFrame to add missing dates and 
count_per_date_hvac = pd.merge(all_dates_df, count_per_date_hvac_df, on='Datetime', how='left')
count_per_date_hvac = count_per_date_hvac.set_index('Datetime')
# Fill dates with no certifications with 0
count_per_date_hvac['count'] = count_per_date_hvac['count'].fillna(0)
# Calculate cumulative sum of entries
cumulative_count_hvac = count_per_date_hvac['count'].cumsum()

# sum up the closures products per date
count_per_date_closures = certificationsData[certificationsData['Device Type Group']=='Closures'].groupby(certificationsData['Datetime'].dt.date).size()
count_per_date_closures_df = pd.DataFrame(count_per_date_closures, columns=['count'])
count_per_date_closures_df = count_per_date_closures_df.reset_index()
# Merge the all_dates_df with the original DataFrame to add missing dates and 
count_per_date_closures = pd.merge(all_dates_df, count_per_date_closures_df, on='Datetime', how='left')
count_per_date_closures = count_per_date_closures.set_index('Datetime')
# Fill dates with no certifications with 0
count_per_date_closures['count'] = count_per_date_closures['count'].fillna(0)
# Calculate cumulative sum of entries
cumulative_count_closures = count_per_date_closures['count'].cumsum()

# sum up the switches products per date
count_per_date_switches = certificationsData[certificationsData['Device Type Group']=='Switches and controls'].groupby(certificationsData['Datetime'].dt.date).size()
count_per_date_switches_df = pd.DataFrame(count_per_date_switches, columns=['count'])
count_per_date_switches_df = count_per_date_switches_df.reset_index()
# Merge the all_dates_df with the original DataFrame to add missing dates and 
count_per_date_switches = pd.merge(all_dates_df, count_per_date_switches_df, on='Datetime', how='left')
count_per_date_switches = count_per_date_switches.set_index('Datetime')
# Fill dates with no certifications with 0
count_per_date_switches['count'] = count_per_date_switches['count'].fillna(0)
# Calculate cumulative sum of entries
cumulative_count_switches = count_per_date_switches['count'].cumsum()

# sum up the SmartPlugs products per date
count_per_date_smartplug = certificationsData[certificationsData['Device Type Group']=='Smart plugs/outlets and other actuators'].groupby(certificationsData['Datetime'].dt.date).size()
count_per_date_smartplug_df = pd.DataFrame(count_per_date_smartplug, columns=['count'])
count_per_date_smartplug_df = count_per_date_smartplug_df.reset_index()
# Merge the all_dates_df with the original DataFrame to add missing dates and 
count_per_date_smartplug = pd.merge(all_dates_df, count_per_date_smartplug_df, on='Datetime', how='left')
count_per_date_smartplug = count_per_date_smartplug.set_index('Datetime')
# Fill dates with no certifications with 0
count_per_date_smartplug['count'] = count_per_date_smartplug['count'].fillna(0)
# Calculate cumulative sum of entries
cumulative_count_smartplug = count_per_date_smartplug['count'].cumsum()

# sum up the Lighting products per date
count_per_date_lighting = certificationsData[certificationsData['Device Type Group']=='Lighting'].groupby(certificationsData['Datetime'].dt.date).size()
count_per_date_lighting_df = pd.DataFrame(count_per_date_lighting, columns=['count'])
count_per_date_lighting_df = count_per_date_lighting_df.reset_index()
# Merge the all_dates_df with the original DataFrame to add missing dates and 
count_per_date_lighting = pd.merge(all_dates_df, count_per_date_lighting_df, on='Datetime', how='left')
count_per_date_lighting = count_per_date_lighting.set_index('Datetime')
# Fill dates with no certifications with 0
count_per_date_lighting['count'] = count_per_date_lighting['count'].fillna(0)
# Calculate cumulative sum of entries
cumulative_count_lighting = count_per_date_lighting['count'].cumsum()

#remove all calculated cumulative_counts from the initial set to get the cumulative_count od other device types
cumulative_count_others=cumulative_count-cumulative_count_hvac-cumulative_count_closures-cumulative_count_switches-cumulative_count_smartplug-cumulative_count_lighting

#graphic to show the types not accumulated
plt.figure(figsize=(15, 6))
plt.plot(cumulative_count_others.index, cumulative_count_others.values, linestyle='-', color='skyblue', label='Others')
plt.plot(cumulative_count_hvac.index, cumulative_count_hvac.values, linestyle='-', color='orange', label='HVAC')
plt.plot(cumulative_count_closures.index, cumulative_count_closures.values, linestyle='-', color='grey', label='Closures')
plt.plot(cumulative_count_switches.index, cumulative_count_switches.values, linestyle='-', color='violet', label='Switches and controls')
plt.plot(cumulative_count_smartplug.index, cumulative_count_smartplug.values, linestyle='-', color='green', label='Smart plugs/outlets and other actuators')
plt.plot(cumulative_count_lighting.index, cumulative_count_lighting.values, linestyle='-', color='yellow', label='Lighting')

plt.xlabel('Date')
plt.ylabel('Certification count')
#plt.title('Certifications per device type group over time')
plt.xticks(count_per_date.index[::10], rotation=90)
# Remove empty spaces on the left and right sides
plt.xlim(count_per_date.index.min(), count_per_date.index.max())
plt.legend()

# Save the plot
plt.savefig('certifications_per_date_and_type.png')
# Show the plot
plt.show()


cumulative_count_smartplug=cumulative_count_smartplug.values+cumulative_count_lighting
cumulative_count_switches=cumulative_count_smartplug.values+cumulative_count_switches
cumulative_count_closures=cumulative_count_switches.values+cumulative_count_closures
cumulative_count_hvac=cumulative_count_closures.values+cumulative_count_hvac

# Plot the cumulative sum
plt.figure(figsize=(15, 6))
plt.plot(cumulative_count.index, cumulative_count.values, linestyle='-')
plt.plot(cumulative_count_lighting.index, cumulative_count_lighting.values, linestyle='-')
plt.fill_between(cumulative_count.index, cumulative_count.values,  color='skyblue', alpha=0.5, label='Others')
plt.fill_between(cumulative_count_hvac.index, cumulative_count_closures.values, cumulative_count_hvac.values,  color='orange', alpha=0.5, label='HVAC')
plt.fill_between(cumulative_count_closures.index, cumulative_count_switches.values, cumulative_count_closures.values,  color='grey', alpha=0.5, label='Closures')
plt.fill_between(cumulative_count_switches.index, cumulative_count_smartplug.values, cumulative_count_switches.values,  color='violet', alpha=0.5, label='Switches and controls')
plt.fill_between(cumulative_count_smartplug.index, cumulative_count_lighting.values, cumulative_count_smartplug.values,  color='green', alpha=0.5, label='Smart plugs/outlets and other actuators')
plt.fill_between(cumulative_count_lighting.index, cumulative_count_lighting.values,  color='yellow', alpha=0.5, label='Lighting')

plt.xlabel('Date')
plt.ylabel('Certification count')
#plt.title('Accumulated certifications per device type group over time')
plt.xticks(count_per_date.index[::10], rotation=90)
# Remove empty spaces on the left and right sides
plt.xlim(count_per_date.index.min(), count_per_date.index.max())
plt.legend(loc='upper left')

# Save the plot
plt.savefig('certifications_per_date_and_type_accumulated.png')
# Show the plot
plt.show()
print(cumulative_count.values.max())




In [None]:
# sum up the products per vendor
count_per_vendor = certificationsData.groupby(certificationsData['vendorName']).size()
print(len(count_per_vendor))

filtered_count_per_vendor = count_per_vendor[count_per_vendor > 3]
print('filtered_count_per_vendor > 3: '+str(len(filtered_count_per_vendor)))
# Sort values in descending order
filtered_count_per_vendor = filtered_count_per_vendor.sort_values(ascending=False)

# Plot the data
plt.figure(figsize=(15, 3))
bars=plt.bar(filtered_count_per_vendor.index, filtered_count_per_vendor.values)
plt.xlabel('Vendor Name')
plt.ylabel('Count')
#plt.title('Number of Certifications per Vendor')
plt.xticks(rotation=90)

# Trim empty space on the left and right
plt.xlim(-0.5, len(filtered_count_per_vendor) - 0.5)

# Annotate each bar with its value
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height}', ha='center', va='bottom')

# Scale of the first plot (to be used for the second plot)
ylim = plt.ylim()

# Save the plot
plt.savefig('certified_products_per_Vendor.png')
# Show the plot
plt.show()

#All Vendors with 2 entries
filtered_count_per_vendor = count_per_vendor[count_per_vendor <= 3]
filtered_count_per_vendor = filtered_count_per_vendor[count_per_vendor >= 2]
print('filtered_count_per_vendor = 2 or 3: '+str(len(filtered_count_per_vendor)))
# Sort values in descending order
filtered_count_per_vendor = filtered_count_per_vendor.sort_values(ascending=False)

# Plot the data
plt.figure(figsize=(15, 3))
bars=plt.bar(filtered_count_per_vendor.index, filtered_count_per_vendor.values)
plt.xlabel('Vendor Name')
plt.ylabel('Count')
#plt.title('Number of Certifications per Vendor')
plt.xticks(rotation=90)

# Trim empty space on the left and right
plt.xlim(-0.5, len(filtered_count_per_vendor) - 0.5)

# Use scale of the first plot for the tis plot
plt.ylim(ylim)

# Annotate each bar with its value
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height}', ha='center', va='bottom')


# Save the plot
plt.savefig('certified_products_per_Vendor2.png')
# Show the plot
plt.show()


#All Vendors with 1 entry
filtered_count_per_vendor = count_per_vendor[count_per_vendor == 1]
print('filtered_count_per_vendor = 1: '+str(len(filtered_count_per_vendor)))
# Sort values in descending order
filtered_count_per_vendor = filtered_count_per_vendor.sort_values(ascending=False)

# Plot the data
plt.figure(figsize=(15, 3))
bars2=plt.bar(filtered_count_per_vendor.index, filtered_count_per_vendor.values)
plt.xlabel('Vendor Name')
plt.ylabel('Count')
#plt.title('Number of Certifications per Vendor')
plt.xticks(rotation=90)

# Use scale of the first plot for the tis plot
plt.ylim(ylim)

# Trim empty space on the left and right
plt.xlim(-0.5, len(filtered_count_per_vendor) - 0.5)

# Annotate each bar with its value
for bar in bars2:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height}', ha='center', va='bottom')


# Save the plot
plt.savefig('certified_products_per_Vendor3.png')
# Show the plot
plt.show()

#All Vendors with 0 entry
filtered_count_per_vendor = count_per_vendor[count_per_vendor == 0]
print('filtered_count_per_vendor = 0: '+str(len(filtered_count_per_vendor)))

In [None]:
# Calculate sum of vendors over time
vendor_certificationsData = certificationsData.sort_values(by=['Datetime'])
vendor_certificationsData = certificationsData.drop_duplicates(subset=['vid'])
# sum up the products per date
vendor_count_per_date = vendor_certificationsData.groupby(vendor_certificationsData['Datetime'].dt.date).size()
vendor_count_per_date_df = pd.DataFrame(vendor_count_per_date, columns=['count'])
vendor_count_per_date_df = vendor_count_per_date_df.reset_index()

# Generate a DataFrame with all dates in the range
date_range = pd.date_range(start=certificationsData['Datetime'].min(), end=certificationsData['Datetime'].max(), freq='D')
all_dates_df = pd.DataFrame({'Datetime': date_range})
all_dates_df['Datetime'] = pd.to_datetime(all_dates_df['Datetime']).dt.date

# Merge the all_dates_df with the original DataFrame to add missing dates and 
vendor_count_per_date = pd.merge(all_dates_df, vendor_count_per_date_df, on='Datetime', how='left')
vendor_count_per_date = vendor_count_per_date.set_index('Datetime')

# Fill dates with no certifications with 0
vendor_count_per_date['count'] = vendor_count_per_date['count'].fillna(0)
cumulative_vendor_count = vendor_count_per_date['count'].cumsum()

# Plot the cumulative sum
plt.figure(figsize=(15, 6))
plt.plot(cumulative_vendor_count.index, cumulative_vendor_count.values, linestyle='-')
plt.fill_between(vendor_count_per_date.index, cumulative_vendor_count.values,  color='skyblue', alpha=0.5 )

plt.xlabel('Date')
plt.ylabel('Cumulative vendors')
#plt.title('Accumulated vendors with certifications over time')
plt.xticks(vendor_count_per_date.index[::10], rotation=90)
# Remove empty spaces on the left and right sides
plt.xlim(vendor_count_per_date.index.min(), vendor_count_per_date.index.max())

# Save the plot
plt.savefig('vendors_with_certification_over_time_accumulated.png')
# Show the plot
plt.show()
print(cumulative_vendor_count.values.max())




# Plot the cumulative sum with trend line
plt.figure(figsize=(15, 6))
plt.plot(cumulative_vendor_count.index, cumulative_vendor_count.values, linestyle='-')
plt.fill_between(vendor_count_per_date.index, cumulative_vendor_count.values,  color='skyblue', alpha=0.5 )

# plot trend line
plt.plot([cumulative_vendor_count.index.min(), cumulative_vendor_count.index.max()], [0, cumulative_vendor_count.values.max()], color='blue', linewidth=2, label=f'Trend line')

plt.plot(pd.Timestamp('2023-05-18'), 0, marker='o', markersize=5, color='red', label=f'Matter update released')
plt.plot(pd.Timestamp('2023-10-23'), 0, marker='o', markersize=5, color='red')
plt.plot(pd.Timestamp('2024-05-08'), 0, marker='o', markersize=5, color='red')
plt.plot(pd.Timestamp('2024-11-07'), 0, marker='o', markersize=5, color='red')

plt.xlabel('Date')
plt.ylabel('Cumulative vendors')
#plt.title('Accumulated vendors with certifications over time with trend line')
plt.xticks(vendor_count_per_date.index[::10], rotation=90)
# Remove empty spaces on the left and right sides
plt.xlim(vendor_count_per_date.index.min(), vendor_count_per_date.index.max())

plt.legend()

# Save the plot
plt.savefig('vendors_with_certification_over_time_accumulated_with_trend_line.png')
# Show the plot
plt.show()

In [None]:
deviceTypeGroupPerVendorDictionary = {}
for index, row in certificationsData.iterrows():
    if( row['vid'] not in deviceTypeGroupPerVendorDictionary):
        deviceTypeGroupPerVendorDictionary[row['vid']]={}
        deviceTypeGroupPerVendorDictionary[row['vid']][row['Device Type Group']]=1
    else: 
        if( row['Device Type Group'] in deviceTypeGroupPerVendorDictionary[row['vid']]):
            deviceTypeGroupPerVendorDictionary[row['vid']][row['Device Type Group']] = 1+ deviceTypeGroupPerVendorDictionary[row['vid']][row['Device Type Group']] 
        else:
            deviceTypeGroupPerVendorDictionary[row['vid']][row['Device Type Group']]=1

deviceTypeGroupPerVendor_df = pd.DataFrame(deviceTypeGroupPerVendorDictionary)
deviceTypeGroupPerVendor_df = deviceTypeGroupPerVendor_df.T
deviceTypeGroupPerVendor_df['sum']= len(deviceTypeGroupPerVendor_df.columns)-deviceTypeGroupPerVendor_df.isnull().sum(axis=1)

# Plot the data
plt.figure(figsize=(15, 1))
plt.boxplot(deviceTypeGroupPerVendor_df['sum'], vert=False, showmeans=True)

plt.xticks(deviceTypeGroupPerVendor_df['sum'])
plt.grid(True)
#plt.title('Boxplot of device type groups per vendor')
plt.yticks([])
plt.xlabel('Number of different device type groups')

# Save the plot
plt.savefig('devicetype_groups_per_vendor.png')
# Show the plot
plt.show()


deviceTypeGroupPerVendor_df_grouped = deviceTypeGroupPerVendor_df.groupby(deviceTypeGroupPerVendor_df['sum']).size()
# Plot the data
plt.figure(figsize=(15, 6))
bars=plt.bar(deviceTypeGroupPerVendor_df_grouped.index, deviceTypeGroupPerVendor_df_grouped.values)
plt.xlabel('Number of different device type groups')
plt.ylabel('Number of vendors')
#plt.title('Barchart of device type groups per vendor')

# Annotate each bar with its value
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height}', ha='center', va='bottom')

# Save the plot
plt.savefig('devicetype_groups_per_vendor_barchart.png')
# Show the plot
plt.show()




deviceTypePerVendorDictionary = {}
for index, row in certificationsData.iterrows():
    if( row['vid'] not in deviceTypePerVendorDictionary):
        deviceTypePerVendorDictionary[row['vid']]={}
        deviceTypePerVendorDictionary[row['vid']][row['Device Type']]=1
    else: 
        if( row['Device Type'] in deviceTypePerVendorDictionary[row['vid']]):
            deviceTypePerVendorDictionary[row['vid']][row['Device Type']] = deviceTypePerVendorDictionary[row['vid']][row['Device Type']] + 1 
        else:
            deviceTypePerVendorDictionary[row['vid']][row['Device Type']]=1

deviceTypePerVendor_df = pd.DataFrame(deviceTypePerVendorDictionary)
deviceTypePerVendor_df = deviceTypePerVendor_df.T
deviceTypePerVendor_df['sum']= len(deviceTypePerVendor_df.columns)-deviceTypePerVendor_df.isnull().sum(axis=1)

plt.figure(figsize=(15, 1))
plt.boxplot(deviceTypePerVendor_df['sum'], vert=False, showmeans=True)

plt.xticks(deviceTypePerVendor_df['sum'])
plt.grid(True)
#plt.title('Boxplot of device type per vendor')
plt.yticks([])
plt.xlabel('Number of different device types')

# Save the plot
plt.savefig('devicetype_per_vendor.png')
# Show the plot
plt.show()


deviceTypePerVendor_df_grouped = deviceTypePerVendor_df.groupby(deviceTypePerVendor_df['sum']).size()
# Plot the data
plt.figure(figsize=(15, 6))
bars=plt.bar(deviceTypePerVendor_df_grouped.index, deviceTypePerVendor_df_grouped.values)
plt.xlabel('Number of different device types')
plt.ylabel('Number of vendors')
#plt.title('Barchart of device types per vendor')

# Annotate each bar with its value
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height}', ha='center', va='bottom')

plt.xticks(np.arange(1, 10, step=1))

# Save the plot
plt.savefig('devicetype_per_vendor_barchart.png')
# Show the plot
plt.show()

print('Mean Count device type per vendor: '+str(deviceTypePerVendor_df['sum'].mean()))
print('Mean Count device type group per vendor: '+str(deviceTypeGroupPerVendor_df['sum'].mean()))