In [None]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime

In [None]:
# Define date to conduct the analysis with (default set to today, override if required)
date=datetime.today().strftime('%Y.%m.%d')
#date='2025.03.21'
print('Data analysis of uncertified devices started for date: '+date)

# Read data from file
certificationsData = pd.read_csv('certified_devices_'+date+'.csv')
uncertifiedDataNotUnique = pd.read_csv('uncertified_devices_'+date+'.csv')
print('Uncerified Model Version: '+str(len(uncertifiedDataNotUnique.index)))

# remove historical version models which won’t get certified in the future
uncertifiedData = uncertifiedDataNotUnique.drop_duplicates(subset=['vidPid'], keep='last')
print('Unique uncertified vendor and product ids: '+str(len(uncertifiedData)))

In [None]:
# sum up the products per type group
count_per_product_type_group = uncertifiedData.groupby(uncertifiedData['Device Type Group']).size()

# Plot the data
plt.figure(figsize=(15, 6))
bars=plt.bar(count_per_product_type_group.index, count_per_product_type_group.values)
plt.xlabel('Device type group')
plt.ylabel('Count')
#plt.title('Number of uncertified devices per device type group')
plt.xticks(rotation=90)

# Trim empty space on the left and right
plt.xlim(-0.5, len(count_per_product_type_group) - 0.5)

# Annotate each bar with its value
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height}', ha='center', va='bottom')


# Save the plot
plt.savefig('uncertified_products_per_type_group.png')
# Show the plot
plt.show()


# sum up the products per type
count_per_product_type = uncertifiedData.groupby(uncertifiedData['Device Type']).size()

# Plot the data
plt.figure(figsize=(15, 6))
bars=plt.bar(count_per_product_type.index, count_per_product_type.values)
plt.xlabel('Device type')
plt.ylabel('Count')
#plt.title('Number of uncertified devices per device type')
plt.xticks(rotation=90)

# Trim empty space on the left and right
plt.xlim(-0.5, len(count_per_product_type) - 0.5)

# Annotate each bar with its value
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height}', ha='center', va='bottom')


# Save the plot
plt.savefig('uncertified_products_per_type.png')
# Show the plot
plt.show()

In [None]:
uncertified_nan_vendors = [x for x in uncertifiedData['vendorName'] if x != x] #collect nan values
print('uncertified vendor name nan rows: '+str(len(uncertified_nan_vendors)))

# unique vendors
certified_vendors = certificationsData['vendorName'].unique()
uncertified_vendors = uncertifiedData['vendorName'].unique()
uncertified_vendors = [x for x in uncertified_vendors if x == x] #remove nan values
print('uncertified vendors: '+str(len(uncertified_vendors)))
new_vendors = [x for x in uncertified_vendors if x not in certified_vendors]
print(new_vendors)
print('new vendors: '+str(len(new_vendors)))

In [None]:
# sum up the products per vendor
count_per_vendor = uncertifiedData.groupby(uncertifiedData['vendorName']).size()
print('vendor count: '+str(len(count_per_vendor)))

# Sort values in descending order
count_per_vendor = count_per_vendor.sort_values(ascending=False)

# Plot the data
plt.figure(figsize=(15, 3))
bars=plt.bar(count_per_vendor.index, count_per_vendor.values)
plt.xlabel('Vendor Name')
plt.ylabel('Count')
#plt.title('Number of Device software models of uncertified devices per Vendor')
# Mark the labels of new vendors bold
plt.xticks(rotation=90)
for label in plt.gca().get_xticklabels():
    if label.get_text() in new_vendors:
        label.set_fontweight('bold')

# Trim empty space on the left and right
plt.xlim(-0.5, len(count_per_vendor) - 0.5)

# Annotate each bar with its value
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2, height, f'{height}', ha='center', va='bottom')

# Scale of the first plot (to be used for the second plot)
ylim = plt.ylim()

# Save the plot
plt.savefig('uncertified_products_per_Vendor.png')
# Show the plot
plt.show()

In [None]:
multiplesoftwareVersionModels=uncertifiedDataNotUnique.groupby(uncertifiedDataNotUnique['vidPid']).size()
multiplesoftwareVersionModels=multiplesoftwareVersionModels[multiplesoftwareVersionModels.values>1]
#print(multiplesoftwareVersionModels)
multipleEntries = len(multiplesoftwareVersionModels)
multipleEntriesPercentage=multipleEntries*100 / len(uncertifiedData)
print(multipleEntriesPercentage)
print('multiple entries: '+str(multipleEntries))
percent_missing = uncertifiedData['softwareVersion'].isnull().sum() * 100 / len(uncertifiedData)
print('softwareVersion percent missing: '+str(percent_missing))
labels = str(len(uncertifiedData)-uncertifiedData['softwareVersion'].isnull().sum()-multipleEntries)+' Software Version Model provided',str(multipleEntries)+' Multiple Software Version Models provided', str(uncertifiedData['softwareVersion'].isnull().sum())+' Software Version Model not provided'
sizes = [100-percent_missing-multipleEntriesPercentage, multipleEntriesPercentage, percent_missing]
fig, ax = plt.subplots()
color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
ax.pie(sizes, labels=labels, autopct='%1.1f%%', colors=[color_cycle[0],color_cycle[2], color_cycle[1]],
       pctdistance=1.15, labeldistance=.4)

# Save the plot
plt.savefig('uncertified_softwareVersionModel_provided_piechart.png')
# Show the plot
plt.show()


percent_missing = uncertifiedData['otaUrl'].isnull().sum() * 100 / len(uncertifiedData)
print('otaUrl percent missing: '+str(percent_missing))
labels = str(len(uncertifiedData)-uncertifiedData['otaUrl'].isnull().sum())+' OTA URLs provided', str(uncertifiedData['otaUrl'].isnull().sum())+' OTA URLs not provided'
sizes = [100-percent_missing, percent_missing]
fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.1f%%',colors=[color_cycle[0], color_cycle[1]],
       pctdistance=1.15, labeldistance=.4)

# Save the plot
plt.savefig('uncertified_otaURL_provided_piechart.png')
# Show the plot
plt.show()