In [1]:
import pandas as pd
import numpy as np
import os
import time
from collections import defaultdict
from sklearn import linear_model
from sklearn.metrics import accuracy_score,f1_score,recall_score,roc_curve
from sklearn import metrics
import seaborn as sns
import matplotlib.pyplot as plt

**Combine the symptoms and the demographic information**

In [2]:
directory = "./Data/With_Demographics/"

In [3]:
def read_file(filename):
    temp = pd.read_csv(directory+filename)
    temp['index'] = range(0,temp.shape[0])
    temp.set_index('index',inplace = True)
    return temp

In [4]:
def get_all_files(files_):
    data = defaultdict()
    for i in files_:
        print("Reading file : ",i)
        data[i[:-4]] = read_file(i)
    return data

In [5]:
files_ = ['nyumc.csv','goviral.csv','fluwatch.csv','hongkong.csv','hutterite.csv']

In [6]:
data = get_all_files(files_)

Reading file :  nyumc.csv
Reading file :  goviral.csv
Reading file :  fluwatch.csv
Reading file :  hongkong.csv
Reading file :  hutterite.csv


In [7]:
print("The read files are : ",data.keys())

The read files are :  dict_keys(['hongkong', 'nyumc', 'goviral', 'fluwatch', 'hutterite'])


*separate the symptoms and demographics information*

In [12]:
cols = list(data['nyumc'].columns)
cols = [i for i in cols if i!= 'virus']
demographic = ['age 0-4','age 5-15','age 16-44','age 45-64','age 65+','male','female']


In [13]:
symptoms = [i for i in cols if i not in demographic]
print(symptoms)

['fever_sorethroat', 'fever_cough', 'fever_muscle', 'fever_headache', 'fever_fatigue', 'fever_vomit', 'fever_nausea', 'fever_diarrhea', 'fever_chills', 'fever_sneeze', 'fever_shortness of breath', 'fever_phlegm', 'fever_blockednose', 'fever_earache', 'fever_leg pain', 'fever_runnynose', 'sorethroat_cough', 'sorethroat_muscle', 'sorethroat_headache', 'sorethroat_fatigue', 'sorethroat_vomit', 'sorethroat_nausea', 'sorethroat_diarrhea', 'sorethroat_chills', 'sorethroat_sneeze', 'sorethroat_shortness of breath', 'sorethroat_phlegm', 'sorethroat_blockednose', 'sorethroat_earache', 'sorethroat_leg pain', 'sorethroat_runnynose', 'cough_muscle', 'cough_headache', 'cough_fatigue', 'cough_vomit', 'cough_nausea', 'cough_diarrhea', 'cough_chills', 'cough_sneeze', 'cough_shortness of breath', 'cough_phlegm', 'cough_blockednose', 'cough_earache', 'cough_leg pain', 'cough_runnynose', 'muscle_headache', 'muscle_fatigue', 'muscle_vomit', 'muscle_nausea', 'muscle_diarrhea', 'muscle_chills', 'muscle_snee

In [14]:
def combine(symptoms,demographic):
    combined_features = [(p1, p2) for p1 in symptoms for p2 in demographic]
    return combined_features

In [15]:
combined_features = combine(symptoms,demographic)

In [16]:
def create_columns(combined_features):
    columns_ = [i[0]+"_"+i[1] for i in combined_features]
    return columns_

In [17]:
columns_ = create_columns(combined_features)

In [20]:
def create_new_dataframe(data_,combined_features,columns_):
    new_data = pd.DataFrame(columns=columns_)
    for i in combined_features:
        print(i)
        symp = i[0]
        dem  = i[1]
        new_data[symp+"_"+dem] = np.where((data_[symp] == 1) & (data_[dem] == 1), 1, 0)

    new_data['virus'] = data_['virus']
    return new_data
    
    

In [21]:
data_nyumc = create_new_dataframe(data['nyumc'],combined_features,columns_)
print("Original number of sample points : ",data['nyumc'].shape)
print("Final number of sample points : ",data_nyumc.shape)

('fever_sorethroat', 'age 0-4')
('fever_sorethroat', 'age 5-15')
('fever_sorethroat', 'age 16-44')
('fever_sorethroat', 'age 45-64')
('fever_sorethroat', 'age 65+')
('fever_sorethroat', 'male')
('fever_sorethroat', 'female')
('fever_cough', 'age 0-4')
('fever_cough', 'age 5-15')
('fever_cough', 'age 16-44')
('fever_cough', 'age 45-64')
('fever_cough', 'age 65+')
('fever_cough', 'male')
('fever_cough', 'female')
('fever_muscle', 'age 0-4')
('fever_muscle', 'age 5-15')
('fever_muscle', 'age 16-44')
('fever_muscle', 'age 45-64')
('fever_muscle', 'age 65+')
('fever_muscle', 'male')
('fever_muscle', 'female')
('fever_headache', 'age 0-4')
('fever_headache', 'age 5-15')
('fever_headache', 'age 16-44')
('fever_headache', 'age 45-64')
('fever_headache', 'age 65+')
('fever_headache', 'male')
('fever_headache', 'female')
('fever_fatigue', 'age 0-4')
('fever_fatigue', 'age 5-15')
('fever_fatigue', 'age 16-44')
('fever_fatigue', 'age 45-64')
('fever_fatigue', 'age 65+')
('fever_fatigue', 'male')
(

('cough_diarrhea', 'female')
('cough_chills', 'age 0-4')
('cough_chills', 'age 5-15')
('cough_chills', 'age 16-44')
('cough_chills', 'age 45-64')
('cough_chills', 'age 65+')
('cough_chills', 'male')
('cough_chills', 'female')
('cough_sneeze', 'age 0-4')
('cough_sneeze', 'age 5-15')
('cough_sneeze', 'age 16-44')
('cough_sneeze', 'age 45-64')
('cough_sneeze', 'age 65+')
('cough_sneeze', 'male')
('cough_sneeze', 'female')
('cough_shortness of breath', 'age 0-4')
('cough_shortness of breath', 'age 5-15')
('cough_shortness of breath', 'age 16-44')
('cough_shortness of breath', 'age 45-64')
('cough_shortness of breath', 'age 65+')
('cough_shortness of breath', 'male')
('cough_shortness of breath', 'female')
('cough_phlegm', 'age 0-4')
('cough_phlegm', 'age 5-15')
('cough_phlegm', 'age 16-44')
('cough_phlegm', 'age 45-64')
('cough_phlegm', 'age 65+')
('cough_phlegm', 'male')
('cough_phlegm', 'female')
('cough_blockednose', 'age 0-4')
('cough_blockednose', 'age 5-15')
('cough_blockednose', 'ag

('fatigue_chills', 'female')
('fatigue_sneeze', 'age 0-4')
('fatigue_sneeze', 'age 5-15')
('fatigue_sneeze', 'age 16-44')
('fatigue_sneeze', 'age 45-64')
('fatigue_sneeze', 'age 65+')
('fatigue_sneeze', 'male')
('fatigue_sneeze', 'female')
('fatigue_shortness of breath', 'age 0-4')
('fatigue_shortness of breath', 'age 5-15')
('fatigue_shortness of breath', 'age 16-44')
('fatigue_shortness of breath', 'age 45-64')
('fatigue_shortness of breath', 'age 65+')
('fatigue_shortness of breath', 'male')
('fatigue_shortness of breath', 'female')
('fatigue_phlegm', 'age 0-4')
('fatigue_phlegm', 'age 5-15')
('fatigue_phlegm', 'age 16-44')
('fatigue_phlegm', 'age 45-64')
('fatigue_phlegm', 'age 65+')
('fatigue_phlegm', 'male')
('fatigue_phlegm', 'female')
('fatigue_blockednose', 'age 0-4')
('fatigue_blockednose', 'age 5-15')
('fatigue_blockednose', 'age 16-44')
('fatigue_blockednose', 'age 45-64')
('fatigue_blockednose', 'age 65+')
('fatigue_blockednose', 'male')
('fatigue_blockednose', 'female')
(

('chills_phlegm', 'age 5-15')
('chills_phlegm', 'age 16-44')
('chills_phlegm', 'age 45-64')
('chills_phlegm', 'age 65+')
('chills_phlegm', 'male')
('chills_phlegm', 'female')
('chills_blockednose', 'age 0-4')
('chills_blockednose', 'age 5-15')
('chills_blockednose', 'age 16-44')
('chills_blockednose', 'age 45-64')
('chills_blockednose', 'age 65+')
('chills_blockednose', 'male')
('chills_blockednose', 'female')
('chills_earache', 'age 0-4')
('chills_earache', 'age 5-15')
('chills_earache', 'age 16-44')
('chills_earache', 'age 45-64')
('chills_earache', 'age 65+')
('chills_earache', 'male')
('chills_earache', 'female')
('chills_leg pain', 'age 0-4')
('chills_leg pain', 'age 5-15')
('chills_leg pain', 'age 16-44')
('chills_leg pain', 'age 45-64')
('chills_leg pain', 'age 65+')
('chills_leg pain', 'male')
('chills_leg pain', 'female')
('chills_runnynose', 'age 0-4')
('chills_runnynose', 'age 5-15')
('chills_runnynose', 'age 16-44')
('chills_runnynose', 'age 45-64')
('chills_runnynose', 'ag

('phlegm', 'age 45-64')
('phlegm', 'age 65+')
('phlegm', 'male')
('phlegm', 'female')
('blockednose', 'age 0-4')
('blockednose', 'age 5-15')
('blockednose', 'age 16-44')
('blockednose', 'age 45-64')
('blockednose', 'age 65+')
('blockednose', 'male')
('blockednose', 'female')
('earache', 'age 0-4')
('earache', 'age 5-15')
('earache', 'age 16-44')
('earache', 'age 45-64')
('earache', 'age 65+')
('earache', 'male')
('earache', 'female')
('leg pain', 'age 0-4')
('leg pain', 'age 5-15')
('leg pain', 'age 16-44')
('leg pain', 'age 45-64')
('leg pain', 'age 65+')
('leg pain', 'male')
('leg pain', 'female')
('runnynose', 'age 0-4')
('runnynose', 'age 5-15')
('runnynose', 'age 16-44')
('runnynose', 'age 45-64')
('runnynose', 'age 65+')
('runnynose', 'male')
('runnynose', 'female')
Original number of sample points :  (21907, 161)
Final number of sample points :  (21907, 1072)


In [22]:
data_nyumc.head()

Unnamed: 0,fever_sorethroat_age 0-4,fever_sorethroat_age 5-15,fever_sorethroat_age 16-44,fever_sorethroat_age 45-64,fever_sorethroat_age 65+,fever_sorethroat_male,fever_sorethroat_female,fever_cough_age 0-4,fever_cough_age 5-15,fever_cough_age 16-44,...,leg pain_male,leg pain_female,runnynose_age 0-4,runnynose_age 5-15,runnynose_age 16-44,runnynose_age 45-64,runnynose_age 65+,runnynose_male,runnynose_female,virus
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [23]:
data_gv = create_new_dataframe(data['goviral'],combined_features,columns_)
print("Original number of sample points : ",data['goviral'].shape)
print("Final number of sample points : ",data_gv.shape)

('fever_sorethroat', 'age 0-4')
('fever_sorethroat', 'age 5-15')
('fever_sorethroat', 'age 16-44')
('fever_sorethroat', 'age 45-64')
('fever_sorethroat', 'age 65+')
('fever_sorethroat', 'male')
('fever_sorethroat', 'female')
('fever_cough', 'age 0-4')
('fever_cough', 'age 5-15')
('fever_cough', 'age 16-44')
('fever_cough', 'age 45-64')
('fever_cough', 'age 65+')
('fever_cough', 'male')
('fever_cough', 'female')
('fever_muscle', 'age 0-4')
('fever_muscle', 'age 5-15')
('fever_muscle', 'age 16-44')
('fever_muscle', 'age 45-64')
('fever_muscle', 'age 65+')
('fever_muscle', 'male')
('fever_muscle', 'female')
('fever_headache', 'age 0-4')
('fever_headache', 'age 5-15')
('fever_headache', 'age 16-44')
('fever_headache', 'age 45-64')
('fever_headache', 'age 65+')
('fever_headache', 'male')
('fever_headache', 'female')
('fever_fatigue', 'age 0-4')
('fever_fatigue', 'age 5-15')
('fever_fatigue', 'age 16-44')
('fever_fatigue', 'age 45-64')
('fever_fatigue', 'age 65+')
('fever_fatigue', 'male')
(

('cough_blockednose', 'male')
('cough_blockednose', 'female')
('cough_earache', 'age 0-4')
('cough_earache', 'age 5-15')
('cough_earache', 'age 16-44')
('cough_earache', 'age 45-64')
('cough_earache', 'age 65+')
('cough_earache', 'male')
('cough_earache', 'female')
('cough_leg pain', 'age 0-4')
('cough_leg pain', 'age 5-15')
('cough_leg pain', 'age 16-44')
('cough_leg pain', 'age 45-64')
('cough_leg pain', 'age 65+')
('cough_leg pain', 'male')
('cough_leg pain', 'female')
('cough_runnynose', 'age 0-4')
('cough_runnynose', 'age 5-15')
('cough_runnynose', 'age 16-44')
('cough_runnynose', 'age 45-64')
('cough_runnynose', 'age 65+')
('cough_runnynose', 'male')
('cough_runnynose', 'female')
('muscle_headache', 'age 0-4')
('muscle_headache', 'age 5-15')
('muscle_headache', 'age 16-44')
('muscle_headache', 'age 45-64')
('muscle_headache', 'age 65+')
('muscle_headache', 'male')
('muscle_headache', 'female')
('muscle_fatigue', 'age 0-4')
('muscle_fatigue', 'age 5-15')
('muscle_fatigue', 'age 16

('vomit_earache', 'age 45-64')
('vomit_earache', 'age 65+')
('vomit_earache', 'male')
('vomit_earache', 'female')
('vomit_leg pain', 'age 0-4')
('vomit_leg pain', 'age 5-15')
('vomit_leg pain', 'age 16-44')
('vomit_leg pain', 'age 45-64')
('vomit_leg pain', 'age 65+')
('vomit_leg pain', 'male')
('vomit_leg pain', 'female')
('vomit_runnynose', 'age 0-4')
('vomit_runnynose', 'age 5-15')
('vomit_runnynose', 'age 16-44')
('vomit_runnynose', 'age 45-64')
('vomit_runnynose', 'age 65+')
('vomit_runnynose', 'male')
('vomit_runnynose', 'female')
('nausea_diarrhea', 'age 0-4')
('nausea_diarrhea', 'age 5-15')
('nausea_diarrhea', 'age 16-44')
('nausea_diarrhea', 'age 45-64')
('nausea_diarrhea', 'age 65+')
('nausea_diarrhea', 'male')
('nausea_diarrhea', 'female')
('nausea_chills', 'age 0-4')
('nausea_chills', 'age 5-15')
('nausea_chills', 'age 16-44')
('nausea_chills', 'age 45-64')
('nausea_chills', 'age 65+')
('nausea_chills', 'male')
('nausea_chills', 'female')
('nausea_sneeze', 'age 0-4')
('naus

('shortness of breath_leg pain', 'age 65+')
('shortness of breath_leg pain', 'male')
('shortness of breath_leg pain', 'female')
('shortness of breath_runnynose', 'age 0-4')
('shortness of breath_runnynose', 'age 5-15')
('shortness of breath_runnynose', 'age 16-44')
('shortness of breath_runnynose', 'age 45-64')
('shortness of breath_runnynose', 'age 65+')
('shortness of breath_runnynose', 'male')
('shortness of breath_runnynose', 'female')
('phlegm_blockednose', 'age 0-4')
('phlegm_blockednose', 'age 5-15')
('phlegm_blockednose', 'age 16-44')
('phlegm_blockednose', 'age 45-64')
('phlegm_blockednose', 'age 65+')
('phlegm_blockednose', 'male')
('phlegm_blockednose', 'female')
('phlegm_earache', 'age 0-4')
('phlegm_earache', 'age 5-15')
('phlegm_earache', 'age 16-44')
('phlegm_earache', 'age 45-64')
('phlegm_earache', 'age 65+')
('phlegm_earache', 'male')
('phlegm_earache', 'female')
('phlegm_leg pain', 'age 0-4')
('phlegm_leg pain', 'age 5-15')
('phlegm_leg pain', 'age 16-44')
('phlegm_l

In [24]:
data_gv.head()

Unnamed: 0,fever_sorethroat_age 0-4,fever_sorethroat_age 5-15,fever_sorethroat_age 16-44,fever_sorethroat_age 45-64,fever_sorethroat_age 65+,fever_sorethroat_male,fever_sorethroat_female,fever_cough_age 0-4,fever_cough_age 5-15,fever_cough_age 16-44,...,leg pain_male,leg pain_female,runnynose_age 0-4,runnynose_age 5-15,runnynose_age 16-44,runnynose_age 45-64,runnynose_age 65+,runnynose_male,runnynose_female,virus
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [25]:
data_fw = create_new_dataframe(data['fluwatch'],combined_features,columns_)
print("Original number of sample points : ",data['fluwatch'].shape)
print("Final number of sample points : ",data_fw.shape)

('fever_sorethroat', 'age 0-4')
('fever_sorethroat', 'age 5-15')
('fever_sorethroat', 'age 16-44')
('fever_sorethroat', 'age 45-64')
('fever_sorethroat', 'age 65+')
('fever_sorethroat', 'male')
('fever_sorethroat', 'female')
('fever_cough', 'age 0-4')
('fever_cough', 'age 5-15')
('fever_cough', 'age 16-44')
('fever_cough', 'age 45-64')
('fever_cough', 'age 65+')
('fever_cough', 'male')
('fever_cough', 'female')
('fever_muscle', 'age 0-4')
('fever_muscle', 'age 5-15')
('fever_muscle', 'age 16-44')
('fever_muscle', 'age 45-64')
('fever_muscle', 'age 65+')
('fever_muscle', 'male')
('fever_muscle', 'female')
('fever_headache', 'age 0-4')
('fever_headache', 'age 5-15')
('fever_headache', 'age 16-44')
('fever_headache', 'age 45-64')
('fever_headache', 'age 65+')
('fever_headache', 'male')
('fever_headache', 'female')
('fever_fatigue', 'age 0-4')
('fever_fatigue', 'age 5-15')
('fever_fatigue', 'age 16-44')
('fever_fatigue', 'age 45-64')
('fever_fatigue', 'age 65+')
('fever_fatigue', 'male')
(

('cough_phlegm', 'female')
('cough_blockednose', 'age 0-4')
('cough_blockednose', 'age 5-15')
('cough_blockednose', 'age 16-44')
('cough_blockednose', 'age 45-64')
('cough_blockednose', 'age 65+')
('cough_blockednose', 'male')
('cough_blockednose', 'female')
('cough_earache', 'age 0-4')
('cough_earache', 'age 5-15')
('cough_earache', 'age 16-44')
('cough_earache', 'age 45-64')
('cough_earache', 'age 65+')
('cough_earache', 'male')
('cough_earache', 'female')
('cough_leg pain', 'age 0-4')
('cough_leg pain', 'age 5-15')
('cough_leg pain', 'age 16-44')
('cough_leg pain', 'age 45-64')
('cough_leg pain', 'age 65+')
('cough_leg pain', 'male')
('cough_leg pain', 'female')
('cough_runnynose', 'age 0-4')
('cough_runnynose', 'age 5-15')
('cough_runnynose', 'age 16-44')
('cough_runnynose', 'age 45-64')
('cough_runnynose', 'age 65+')
('cough_runnynose', 'male')
('cough_runnynose', 'female')
('muscle_headache', 'age 0-4')
('muscle_headache', 'age 5-15')
('muscle_headache', 'age 16-44')
('muscle_hea

('vomit_chills', 'age 5-15')
('vomit_chills', 'age 16-44')
('vomit_chills', 'age 45-64')
('vomit_chills', 'age 65+')
('vomit_chills', 'male')
('vomit_chills', 'female')
('vomit_sneeze', 'age 0-4')
('vomit_sneeze', 'age 5-15')
('vomit_sneeze', 'age 16-44')
('vomit_sneeze', 'age 45-64')
('vomit_sneeze', 'age 65+')
('vomit_sneeze', 'male')
('vomit_sneeze', 'female')
('vomit_shortness of breath', 'age 0-4')
('vomit_shortness of breath', 'age 5-15')
('vomit_shortness of breath', 'age 16-44')
('vomit_shortness of breath', 'age 45-64')
('vomit_shortness of breath', 'age 65+')
('vomit_shortness of breath', 'male')
('vomit_shortness of breath', 'female')
('vomit_phlegm', 'age 0-4')
('vomit_phlegm', 'age 5-15')
('vomit_phlegm', 'age 16-44')
('vomit_phlegm', 'age 45-64')
('vomit_phlegm', 'age 65+')
('vomit_phlegm', 'male')
('vomit_phlegm', 'female')
('vomit_blockednose', 'age 0-4')
('vomit_blockednose', 'age 5-15')
('vomit_blockednose', 'age 16-44')
('vomit_blockednose', 'age 45-64')
('vomit_bloc

('sneeze_leg pain', 'age 0-4')
('sneeze_leg pain', 'age 5-15')
('sneeze_leg pain', 'age 16-44')
('sneeze_leg pain', 'age 45-64')
('sneeze_leg pain', 'age 65+')
('sneeze_leg pain', 'male')
('sneeze_leg pain', 'female')
('sneeze_runnynose', 'age 0-4')
('sneeze_runnynose', 'age 5-15')
('sneeze_runnynose', 'age 16-44')
('sneeze_runnynose', 'age 45-64')
('sneeze_runnynose', 'age 65+')
('sneeze_runnynose', 'male')
('sneeze_runnynose', 'female')
('shortness of breath_phlegm', 'age 0-4')
('shortness of breath_phlegm', 'age 5-15')
('shortness of breath_phlegm', 'age 16-44')
('shortness of breath_phlegm', 'age 45-64')
('shortness of breath_phlegm', 'age 65+')
('shortness of breath_phlegm', 'male')
('shortness of breath_phlegm', 'female')
('shortness of breath_blockednose', 'age 0-4')
('shortness of breath_blockednose', 'age 5-15')
('shortness of breath_blockednose', 'age 16-44')
('shortness of breath_blockednose', 'age 45-64')
('shortness of breath_blockednose', 'age 65+')
('shortness of breath_

In [26]:
data_fw.head()

Unnamed: 0,fever_sorethroat_age 0-4,fever_sorethroat_age 5-15,fever_sorethroat_age 16-44,fever_sorethroat_age 45-64,fever_sorethroat_age 65+,fever_sorethroat_male,fever_sorethroat_female,fever_cough_age 0-4,fever_cough_age 5-15,fever_cough_age 16-44,...,leg pain_male,leg pain_female,runnynose_age 0-4,runnynose_age 5-15,runnynose_age 16-44,runnynose_age 45-64,runnynose_age 65+,runnynose_male,runnynose_female,virus
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
data_hk = create_new_dataframe(data['hongkong'],combined_features,columns_)
print("Original number of sample points : ",data['hongkong'].shape)
print("Final number of sample points :",data_hk.shape)

('fever_sorethroat', 'age 0-4')
('fever_sorethroat', 'age 5-15')
('fever_sorethroat', 'age 16-44')
('fever_sorethroat', 'age 45-64')
('fever_sorethroat', 'age 65+')
('fever_sorethroat', 'male')
('fever_sorethroat', 'female')
('fever_cough', 'age 0-4')
('fever_cough', 'age 5-15')
('fever_cough', 'age 16-44')
('fever_cough', 'age 45-64')
('fever_cough', 'age 65+')
('fever_cough', 'male')
('fever_cough', 'female')
('fever_muscle', 'age 0-4')
('fever_muscle', 'age 5-15')
('fever_muscle', 'age 16-44')
('fever_muscle', 'age 45-64')
('fever_muscle', 'age 65+')
('fever_muscle', 'male')
('fever_muscle', 'female')
('fever_headache', 'age 0-4')
('fever_headache', 'age 5-15')
('fever_headache', 'age 16-44')
('fever_headache', 'age 45-64')
('fever_headache', 'age 65+')
('fever_headache', 'male')
('fever_headache', 'female')
('fever_fatigue', 'age 0-4')
('fever_fatigue', 'age 5-15')
('fever_fatigue', 'age 16-44')
('fever_fatigue', 'age 45-64')
('fever_fatigue', 'age 65+')
('fever_fatigue', 'male')
(

('cough_diarrhea', 'female')
('cough_chills', 'age 0-4')
('cough_chills', 'age 5-15')
('cough_chills', 'age 16-44')
('cough_chills', 'age 45-64')
('cough_chills', 'age 65+')
('cough_chills', 'male')
('cough_chills', 'female')
('cough_sneeze', 'age 0-4')
('cough_sneeze', 'age 5-15')
('cough_sneeze', 'age 16-44')
('cough_sneeze', 'age 45-64')
('cough_sneeze', 'age 65+')
('cough_sneeze', 'male')
('cough_sneeze', 'female')
('cough_shortness of breath', 'age 0-4')
('cough_shortness of breath', 'age 5-15')
('cough_shortness of breath', 'age 16-44')
('cough_shortness of breath', 'age 45-64')
('cough_shortness of breath', 'age 65+')
('cough_shortness of breath', 'male')
('cough_shortness of breath', 'female')
('cough_phlegm', 'age 0-4')
('cough_phlegm', 'age 5-15')
('cough_phlegm', 'age 16-44')
('cough_phlegm', 'age 45-64')
('cough_phlegm', 'age 65+')
('cough_phlegm', 'male')
('cough_phlegm', 'female')
('cough_blockednose', 'age 0-4')
('cough_blockednose', 'age 5-15')
('cough_blockednose', 'ag

('fatigue_shortness of breath', 'age 16-44')
('fatigue_shortness of breath', 'age 45-64')
('fatigue_shortness of breath', 'age 65+')
('fatigue_shortness of breath', 'male')
('fatigue_shortness of breath', 'female')
('fatigue_phlegm', 'age 0-4')
('fatigue_phlegm', 'age 5-15')
('fatigue_phlegm', 'age 16-44')
('fatigue_phlegm', 'age 45-64')
('fatigue_phlegm', 'age 65+')
('fatigue_phlegm', 'male')
('fatigue_phlegm', 'female')
('fatigue_blockednose', 'age 0-4')
('fatigue_blockednose', 'age 5-15')
('fatigue_blockednose', 'age 16-44')
('fatigue_blockednose', 'age 45-64')
('fatigue_blockednose', 'age 65+')
('fatigue_blockednose', 'male')
('fatigue_blockednose', 'female')
('fatigue_earache', 'age 0-4')
('fatigue_earache', 'age 5-15')
('fatigue_earache', 'age 16-44')
('fatigue_earache', 'age 45-64')
('fatigue_earache', 'age 65+')
('fatigue_earache', 'male')
('fatigue_earache', 'female')
('fatigue_leg pain', 'age 0-4')
('fatigue_leg pain', 'age 5-15')
('fatigue_leg pain', 'age 16-44')
('fatigue_l

('chills_blockednose', 'age 65+')
('chills_blockednose', 'male')
('chills_blockednose', 'female')
('chills_earache', 'age 0-4')
('chills_earache', 'age 5-15')
('chills_earache', 'age 16-44')
('chills_earache', 'age 45-64')
('chills_earache', 'age 65+')
('chills_earache', 'male')
('chills_earache', 'female')
('chills_leg pain', 'age 0-4')
('chills_leg pain', 'age 5-15')
('chills_leg pain', 'age 16-44')
('chills_leg pain', 'age 45-64')
('chills_leg pain', 'age 65+')
('chills_leg pain', 'male')
('chills_leg pain', 'female')
('chills_runnynose', 'age 0-4')
('chills_runnynose', 'age 5-15')
('chills_runnynose', 'age 16-44')
('chills_runnynose', 'age 45-64')
('chills_runnynose', 'age 65+')
('chills_runnynose', 'male')
('chills_runnynose', 'female')
('sneeze_shortness of breath', 'age 0-4')
('sneeze_shortness of breath', 'age 5-15')
('sneeze_shortness of breath', 'age 16-44')
('sneeze_shortness of breath', 'age 45-64')
('sneeze_shortness of breath', 'age 65+')
('sneeze_shortness of breath', 'm

Original number of sample points :  (4954, 161)
Final number of sample points : (4954, 1072)


In [29]:
data_hk.head()

Unnamed: 0,fever_sorethroat_age 0-4,fever_sorethroat_age 5-15,fever_sorethroat_age 16-44,fever_sorethroat_age 45-64,fever_sorethroat_age 65+,fever_sorethroat_male,fever_sorethroat_female,fever_cough_age 0-4,fever_cough_age 5-15,fever_cough_age 16-44,...,leg pain_male,leg pain_female,runnynose_age 0-4,runnynose_age 5-15,runnynose_age 16-44,runnynose_age 45-64,runnynose_age 65+,runnynose_male,runnynose_female,virus
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
data_ht = create_new_dataframe(data['hutterite'],combined_features,columns_)
print("Original number of sample points : ",data['hutterite'].shape)
print("Final number of sample points : ",data_ht.shape)

('fever_sorethroat', 'age 0-4')
('fever_sorethroat', 'age 5-15')
('fever_sorethroat', 'age 16-44')
('fever_sorethroat', 'age 45-64')
('fever_sorethroat', 'age 65+')
('fever_sorethroat', 'male')
('fever_sorethroat', 'female')
('fever_cough', 'age 0-4')
('fever_cough', 'age 5-15')
('fever_cough', 'age 16-44')
('fever_cough', 'age 45-64')
('fever_cough', 'age 65+')
('fever_cough', 'male')
('fever_cough', 'female')
('fever_muscle', 'age 0-4')
('fever_muscle', 'age 5-15')
('fever_muscle', 'age 16-44')
('fever_muscle', 'age 45-64')
('fever_muscle', 'age 65+')
('fever_muscle', 'male')
('fever_muscle', 'female')
('fever_headache', 'age 0-4')
('fever_headache', 'age 5-15')
('fever_headache', 'age 16-44')
('fever_headache', 'age 45-64')
('fever_headache', 'age 65+')
('fever_headache', 'male')
('fever_headache', 'female')
('fever_fatigue', 'age 0-4')
('fever_fatigue', 'age 5-15')
('fever_fatigue', 'age 16-44')
('fever_fatigue', 'age 45-64')
('fever_fatigue', 'age 65+')
('fever_fatigue', 'male')
(

('cough_phlegm', 'age 16-44')
('cough_phlegm', 'age 45-64')
('cough_phlegm', 'age 65+')
('cough_phlegm', 'male')
('cough_phlegm', 'female')
('cough_blockednose', 'age 0-4')
('cough_blockednose', 'age 5-15')
('cough_blockednose', 'age 16-44')
('cough_blockednose', 'age 45-64')
('cough_blockednose', 'age 65+')
('cough_blockednose', 'male')
('cough_blockednose', 'female')
('cough_earache', 'age 0-4')
('cough_earache', 'age 5-15')
('cough_earache', 'age 16-44')
('cough_earache', 'age 45-64')
('cough_earache', 'age 65+')
('cough_earache', 'male')
('cough_earache', 'female')
('cough_leg pain', 'age 0-4')
('cough_leg pain', 'age 5-15')
('cough_leg pain', 'age 16-44')
('cough_leg pain', 'age 45-64')
('cough_leg pain', 'age 65+')
('cough_leg pain', 'male')
('cough_leg pain', 'female')
('cough_runnynose', 'age 0-4')
('cough_runnynose', 'age 5-15')
('cough_runnynose', 'age 16-44')
('cough_runnynose', 'age 45-64')
('cough_runnynose', 'age 65+')
('cough_runnynose', 'male')
('cough_runnynose', 'fema

('fatigue_earache', 'age 0-4')
('fatigue_earache', 'age 5-15')
('fatigue_earache', 'age 16-44')
('fatigue_earache', 'age 45-64')
('fatigue_earache', 'age 65+')
('fatigue_earache', 'male')
('fatigue_earache', 'female')
('fatigue_leg pain', 'age 0-4')
('fatigue_leg pain', 'age 5-15')
('fatigue_leg pain', 'age 16-44')
('fatigue_leg pain', 'age 45-64')
('fatigue_leg pain', 'age 65+')
('fatigue_leg pain', 'male')
('fatigue_leg pain', 'female')
('fatigue_runnynose', 'age 0-4')
('fatigue_runnynose', 'age 5-15')
('fatigue_runnynose', 'age 16-44')
('fatigue_runnynose', 'age 45-64')
('fatigue_runnynose', 'age 65+')
('fatigue_runnynose', 'male')
('fatigue_runnynose', 'female')
('vomit_nausea', 'age 0-4')
('vomit_nausea', 'age 5-15')
('vomit_nausea', 'age 16-44')
('vomit_nausea', 'age 45-64')
('vomit_nausea', 'age 65+')
('vomit_nausea', 'male')
('vomit_nausea', 'female')
('vomit_diarrhea', 'age 0-4')
('vomit_diarrhea', 'age 5-15')
('vomit_diarrhea', 'age 16-44')
('vomit_diarrhea', 'age 45-64')
('v

('shortness of breath_blockednose', 'age 45-64')
('shortness of breath_blockednose', 'age 65+')
('shortness of breath_blockednose', 'male')
('shortness of breath_blockednose', 'female')
('shortness of breath_earache', 'age 0-4')
('shortness of breath_earache', 'age 5-15')
('shortness of breath_earache', 'age 16-44')
('shortness of breath_earache', 'age 45-64')
('shortness of breath_earache', 'age 65+')
('shortness of breath_earache', 'male')
('shortness of breath_earache', 'female')
('shortness of breath_leg pain', 'age 0-4')
('shortness of breath_leg pain', 'age 5-15')
('shortness of breath_leg pain', 'age 16-44')
('shortness of breath_leg pain', 'age 45-64')
('shortness of breath_leg pain', 'age 65+')
('shortness of breath_leg pain', 'male')
('shortness of breath_leg pain', 'female')
('shortness of breath_runnynose', 'age 0-4')
('shortness of breath_runnynose', 'age 5-15')
('shortness of breath_runnynose', 'age 16-44')
('shortness of breath_runnynose', 'age 45-64')
('shortness of bre

In [31]:
data_ht.head()

Unnamed: 0,fever_sorethroat_age 0-4,fever_sorethroat_age 5-15,fever_sorethroat_age 16-44,fever_sorethroat_age 45-64,fever_sorethroat_age 65+,fever_sorethroat_male,fever_sorethroat_female,fever_cough_age 0-4,fever_cough_age 5-15,fever_cough_age 16-44,...,leg pain_male,leg pain_female,runnynose_age 0-4,runnynose_age 5-15,runnynose_age 16-44,runnynose_age 45-64,runnynose_age 65+,runnynose_male,runnynose_female,virus
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,1,0,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


#### Save the new data files with combined symptoms and demographics

In [33]:
def save_file(dataframe_,filename_):
    print(len(list(dataframe_.columns)))
    directory = "./Data/Combined/"
    filename = directory+filename_
    if os.path.isfile(filename):
        print("File exists!")
    else:
        dataframe_.to_csv(filename,index=False)

In [34]:
save_file(data_nyumc,"nyumc.csv")

1072


In [35]:
save_file(data_gv,"goviral.csv")

1072


In [36]:
save_file(data_gv,"fluwatch.csv")

1072


In [37]:
save_file(data_hk,"hongkong.csv")

1072


In [38]:
save_file(data_ht,"hutterite.csv")

1072
