Imports & File Structure

In [1]:
# Structure of CSV file
# F1: Date, Manufacturer, Type of Device, Exposed
# F2: Date, Manufacturer, Exposed, Number of Countries
# F3: Date, type, amount
# F4: Date, Exploit, Infected, Number of Countries

from os import listdir
import pandas as pd

## Get Exposed by Type

In [2]:
files = listdir('data/exposed')

df_bytype = pd.DataFrame(columns=['date','type','count'])

for f in files:
  df = pd.read_csv(f'data/exposed/{f}')
  df = df.drop(['geo'], axis=1)

  if df.empty:
    df = df.append({'type': 'appliance','count': 0}, ignore_index=True)
  else:
    df = df[pd.notna(df.vendor)]

  dftype = df.groupby(['type']).sum()
  #dftype = dftype.drop(['vendor','model','country','possible_vulnerability'], 1)
  dftype = dftype.reset_index()
  dftype.insert(0, 'date', f[:10])
  
  df_bytype = df_bytype.append(dftype)

df_bytype = df_bytype.drop(['vendor','model','country','possible_vulnerability'], axis=1)
df_bytype.reset_index(drop=True, inplace=True)
df_bytype.to_csv('exposed_by_type.csv', index=False)

## Exposed by Vendor

In [3]:
files = listdir('data/exposed')

df_bytype = pd.DataFrame(columns=['date','vendor','count'])

for f in files:
  df = pd.read_csv(f'data/exposed/{f}')
  df = df.drop(['geo'], axis=1)

  if df.empty:
    df = df.append({'vendor': 'ASUS','count': 0}, ignore_index=True)
  else:
    df = df[pd.notna(df.vendor)]

  dftype = df.groupby(['vendor']).sum()
  #dftype = dftype.drop(['vendor','model','country','possible_vulnerability'], 1)
  dftype = dftype.reset_index()
  dftype.insert(0, 'date', f[:10])
  
  df_bytype = df_bytype.append(dftype)

df_bytype = df_bytype.drop(['type','model','country','possible_vulnerability'], axis=1)
df_bytype.reset_index(drop=True, inplace=True)
df_bytype.to_csv('exposed_by_vendor.csv', index=False)

## Exposed by number of vulns

## Retrieve Type number of vulns

In [5]:
def count_values(values):
  if values.strip() == '':
    return 0
  else:
    return len(values.split(';'))

files = listdir('data/exposed')

df_final = pd.DataFrame(columns=['date','type','vulnerability_count'])

for f in files:
  df = pd.read_csv(f'data/exposed/{f}')
  df = df.drop(['geo', 'model', 'count', 'country'], axis=1)

  if df.empty:
    df = df.append({'type': 'appliance', 'vulnerability_count': 0}, ignore_index=True)
  else:
    df = df[(pd.notna(df.vendor)) & (pd.notna(df.possible_vulnerability))]
    df = df.drop('vendor', 1)
  
    df['vulnerability_count'] = df.apply(lambda x: count_values(x['possible_vulnerability']), axis=1)
    df = df.drop('possible_vulnerability', 1)
  
  df['vulnerability_count'] = df['vulnerability_count'].astype(int)
  df_data = df.groupby(['type']).sum()
  df_data = df_data.reset_index()
  df_data.insert(0, 'date', f[:10])

  df_final = pd.concat([df_final, df_data])

df_final.reset_index(drop=True, inplace=True)
df_final.to_csv('exposed_by_type_num_of_vulns.csv', index=False)

## Retrieve Vendor Number of Vulns

In [6]:
def count_values(values):
  if values.strip() == '':
    return 0
  else:
    return len(values.split(';'))

files = listdir('data/exposed')

df_final = pd.DataFrame(columns=['date','vendor','vulnerability_count'])

for f in files:
  df = pd.read_csv(f'data/exposed/{f}')
  df = df.drop(['geo', 'model', 'count', 'country'], axis=1)

  if df.empty:
    df = df.append({'vendor': 'ASUS', 'vulnerability_count': 0}, ignore_index=True)
  else:
    df = df[(pd.notna(df.vendor)) & (pd.notna(df.possible_vulnerability))]
    df = df.drop('type', axis=1)
  
    df['vulnerability_count'] = df.apply(lambda x: count_values(x['possible_vulnerability']), axis=1)
    df = df.drop('possible_vulnerability', axis=1)
  
  df['vulnerability_count'] = df['vulnerability_count'].astype(int)
  df_data = df.groupby(['vendor']).sum()
  df_data = df_data.reset_index()
  df_data.insert(0, 'date', f[:10])

  df_final = pd.concat([df_final, df_data])

df_final.reset_index(drop=True, inplace=True)
df_final.to_csv('exposed_by_vendor_num_of_vulns.csv', index=False)

## Retrieve Country exposed

In [7]:
files = listdir('data/exposed')

df_final = pd.DataFrame(columns=['date','country','count'])

for f in sorted(files):
  df = pd.read_csv(f'data/exposed/{f}')
  df = df.drop(['possible_vulnerability', 'geo', 'type', 'model', 'vendor'], axis=1)

  if not df.empty:
    df = df[pd.notna(df.country)][pd.notna(df['count'])].groupby(['country']).sum()
    df_data = df.reset_index()
    df_data.insert(0, 'date', f[:10])
    df_final = pd.concat([df_final, df_data])

df_final.reset_index(drop=True, inplace=True)
df_final.to_csv('exposed_by_country.csv', index=False)

In [None]:
df_final

Unnamed: 0,date,strain_count
0,2021-05-03,36
1,2021-05-04,35
2,2021-05-05,34
3,2021-05-06,62
4,2021-05-07,67
...,...,...
358,2022-04-30,42
359,2022-05-01,41
360,2022-05-02,41
361,2022-05-03,41
