In [1]:
import pandas as pd
import numpy as np
import os
import time
from collections import defaultdict
from sklearn.metrics import f1_score,accuracy_score,precision_score,recall_score,roc_curve
from sklearn import metrics
from sklearn import linear_model

In [2]:
def read_file(filename):
    data = pd.read_csv(filename)
    return data

In [3]:
def model_lm(train_data,test_data):
    x_train = train_data.drop(['virus'],axis = 1)
    y_train = train_data['virus']
    x_test = test_data.drop(['virus'],axis = 1)
    y_true = list(test_data['virus'])
    lm = linear_model.LogisticRegression()
    lm.fit(x_train,y_train)
    y_observed = lm.predict(x_test)
    fpr,tpr,threshold = roc_curve(y_true,y_observed)
    auc_ = metrics.auc(fpr,tpr)
    accuracy = accuracy_score(y_observed,y_true)
    return auc_
    
    

### NYUMC Baseline

In [4]:
nyumc_train = read_file("../Data/Symptoms/Train/nyumc.csv")
nyumc_train.head()

Unnamed: 0,fever,sorethroat,cough,muscle,headache,fatigue,vomit,nausea,diarrhea,chills,sneeze,shortness of breath,phlegm,blockednose,earache,leg pain,runnynose,virus
0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [5]:
nyumc_test = read_file("../Data/Symptoms/Test/nyumc.csv")
nyumc_test.head()

Unnamed: 0,fever,sorethroat,cough,muscle,headache,fatigue,vomit,nausea,diarrhea,chills,sneeze,shortness of breath,phlegm,blockednose,earache,leg pain,runnynose,virus
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [6]:
nyumc_auc = model_lm(nyumc_train,nyumc_test)
nyumc_auc

0.5

### GoViral Baseline

In [7]:
goviral_train = read_file("../Data/Symptoms/Train/goviral.csv")
goviral_train.head()

Unnamed: 0,fever,sorethroat,cough,muscle,headache,fatigue,vomit,nausea,diarrhea,chills,sneeze,shortness of breath,phlegm,blockednose,earache,leg pain,runnynose,virus
0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0
1,1,1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1,0
2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [8]:
goviral_test = read_file("../Data/Symptoms/Test/goviral.csv")
goviral_test.head()

Unnamed: 0,fever,sorethroat,cough,muscle,headache,fatigue,vomit,nausea,diarrhea,chills,sneeze,shortness of breath,phlegm,blockednose,earache,leg pain,runnynose,virus
0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,1,0,1
1,1,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1
2,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,1
3,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,1,1,1
4,1,0,0,1,0,1,0,1,0,1,0,0,0,0,0,1,1,1


In [9]:
goviral_auc = model_lm(goviral_train,goviral_test)
goviral_auc

0.61969696969696964

### Fluwatch Baseline

In [10]:
fluwatch_train = read_file("../Data/Symptoms/Train/fluwatch.csv")
fluwatch_train.head()

Unnamed: 0,fever,sorethroat,cough,muscle,headache,fatigue,vomit,nausea,diarrhea,chills,sneeze,shortness of breath,phlegm,blockednose,earache,leg pain,runnynose,virus
0,1.0,1.0,1.0,1.0,1.0,0,0,0,0.0,0,1.0,0,0,1.0,0,0,1.0,1
1,0.0,0.0,1.0,0.0,1.0,0,0,0,0.0,0,0.0,0,0,0.0,0,0,1.0,0
2,0.0,1.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0,0.0,0,0,1.0,1
3,0.0,1.0,1.0,1.0,1.0,0,0,0,1.0,0,0.0,0,0,1.0,0,0,1.0,0
4,0.0,1.0,1.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0,0.0,0,0,1.0,1


In [11]:
fluwatch_test = read_file("../Data/Symptoms/Test/fluwatch.csv")
fluwatch_test.head()

Unnamed: 0,fever,sorethroat,cough,muscle,headache,fatigue,vomit,nausea,diarrhea,chills,sneeze,shortness of breath,phlegm,blockednose,earache,leg pain,runnynose,virus
0,1.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0,0.0,0,0,1.0,0
1,0.0,0.0,1.0,0.0,0.0,0,0,0,0.0,0,1.0,0,0,0.0,0,0,0.0,1
2,0.0,1.0,1.0,1.0,1.0,0,0,0,0.0,0,0.0,0,0,1.0,0,0,1.0,1
3,0.0,1.0,1.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0,1.0,0,0,0.0,0
4,0.0,0.0,1.0,0.0,0.0,0,0,0,0.0,0,1.0,0,0,0.0,0,0,1.0,0


In [12]:
fluwatch_auc = model_lm(fluwatch_train,fluwatch_test)
fluwatch_auc

0.62659862272503686

### HongKong Baseline

In [13]:
hongkong_train = read_file("../Data/Symptoms/Train/hongkong.csv")
hongkong_train.head()

Unnamed: 0,fever,sorethroat,cough,muscle,headache,fatigue,vomit,nausea,diarrhea,chills,sneeze,shortness of breath,phlegm,blockednose,earache,leg pain,runnynose,virus
0,0.0,0.0,0.0,1.0,0.0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0,0,0,0,0,0,0,1.0,0,0,0,1.0,1.0
3,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0
4,0.0,0.0,0.0,1.0,1.0,0,0,0,0,0,0,0,1.0,0,0,0,1.0,0.0


In [14]:
hongkong_test = read_file("../Data/Symptoms/Test/hongkong.csv")
hongkong_test.head()

Unnamed: 0,fever,sorethroat,cough,muscle,headache,fatigue,vomit,nausea,diarrhea,chills,sneeze,shortness of breath,phlegm,blockednose,earache,leg pain,runnynose,virus
0,0.0,1.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0
1,1.0,1.0,1.0,1.0,1.0,0,0,0,0,0,0,0,1.0,0,0,0,1.0,1.0
2,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0
3,0.0,1.0,1.0,1.0,1.0,0,0,0,0,0,0,0,1.0,0,0,0,1.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0.0,0,0,0,1.0,0.0


In [15]:
hongkong_auc = model_lm(hongkong_train,hongkong_test)
hongkong_auc

0.74881872111471337

### Hutterite Baseline

In [16]:
hutterite_train = read_file("../Data/Symptoms/Train/hutterite.csv")
hutterite_train.head()

Unnamed: 0,fever,sorethroat,cough,muscle,headache,fatigue,vomit,nausea,diarrhea,chills,sneeze,shortness of breath,phlegm,blockednose,earache,leg pain,runnynose,virus
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0,1,1.0
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0,1,1.0
2,0,0,0,1,1,1,0,0,0,1,0,0,0,0,0.0,0,0,0.0
3,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0.0,0,0,1.0
4,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0,1,1.0


In [17]:
hutterite_test = read_file("../Data/Symptoms/Test/hutterite.csv")
hutterite_test.head()

Unnamed: 0,fever,sorethroat,cough,muscle,headache,fatigue,vomit,nausea,diarrhea,chills,sneeze,shortness of breath,phlegm,blockednose,earache,leg pain,runnynose,virus
0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0.0,0,1,0.0
1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0,1,1.0
2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0,1,1.0
3,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0,1,1.0


In [18]:
hutterite_auc = model_lm(hutterite_train,hutterite_test)
hutterite_auc

0.58313437404346491

### Baseline results

In [19]:
print("NYUMC : ",nyumc_auc)
print("Goviral : ",goviral_auc)
print("Fluwatch : ",fluwatch_auc)
print("HongKong : ",hongkong_auc)
print("Hutterite : ",hutterite_auc)

NYUMC :  0.5
Goviral :  0.619696969697
Fluwatch :  0.626598622725
HongKong :  0.748818721115
Hutterite :  0.583134374043
