In [1]:
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import json
import copy

In [2]:
from sklearn.linear_model import SGDClassifier, LogisticRegressionCV
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
from sklearn.feature_selection import RFE, RFECV

In [3]:
data_dir = '/Users/abdulkadirgokce/Documents/MSFeCare/MSF-eCARE-filedrop'

!ls $data_dir

clean_data.csv                          metadata_patient.json
cluster_points.csv                      metadata_patient_pred.json
clustered_data.csv                      [31mml-dataset-datadictionary_20211001.xlsx[m[m
clustering_model.pkl                    model_kp.pkl
[31mdatadictionary-workinprogress.xlsx[m[m      patient_data.csv
[31mfull-310k - Copy xl.xlsx[m[m                patient_pred_data.csv
[31mfull-310k - Copy.csv[m[m                    processed_data.csv
gps_coordinates.json                    ~$full-310k - Copy xl.xlsx
metadata.json


In [4]:
filename = 'patient_pred_data.csv'
file_dir = os.path.join(data_dir, filename)
df = pd.read_csv(file_dir)
df

Unnamed: 0,country,hf_town,latitude,longitude,week_number_sine,week_number_cosine,month_sine,month_cosine,year,datetime,...,hisdx_malaria_sev,hisdx_malnut,hisdx_measles,hisdx_anemia,hisdx_others,hisdx_ot_skin,hisdx_ot_uti,hisdx_ot_eye,hisdx_ot_mouth,hisdx_ot_fever_viral
0,RCA,Batangafo,0.648635,-0.788528,1.005444,0.954471,1.187091,0.742937,-2.104623,2017-02-15 12:11:47,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,RCA,Batangafo,0.648635,-0.788528,0.116275,1.420759,0.668856,1.260701,-2.104623,2017-01-05 12:39:43,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,RCA,Kambakota,0.629644,-0.829146,-1.114237,-0.921859,-1.265222,-0.671621,-2.104623,2017-08-16 10:05:44,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,RCA,Batangafo,0.648635,-0.788528,1.199349,-0.641170,1.187091,-0.671621,-2.104623,2017-04-28 19:19:26,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,RCA,Batangafo,0.648635,-0.788528,-0.712413,1.269021,-0.039066,1.450215,-3.037628,2016-12-02 10:22:03,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
310284,RCA,Kabo,0.698784,-0.756215,1.320391,0.354882,1.376778,0.035658,0.694394,2020-03-12 09:55:48,...,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
310285,RCA,Kabo,0.698784,-0.756215,0.284458,1.389964,0.668856,1.260701,0.694394,2020-01-08 08:26:21,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
310286,RCA,Kabo,0.698784,-0.756215,1.351212,-0.154226,1.187091,-0.671621,0.694394,2020-04-04 10:06:07,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
310287,RCA,Kabo,0.698784,-0.756215,-0.556493,-1.306526,-0.746987,-1.189385,0.694394,2020-07-16 14:42:21,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
df.info(True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 310289 entries, 0 to 310288
Data columns (total 218 columns):
 #    Column                               Dtype  
---   ------                               -----  
 0    country                              object 
 1    hf_town                              object 
 2    latitude                             float64
 3    longitude                            float64
 4    week_number_sine                     float64
 5    week_number_cosine                   float64
 6    month_sine                           float64
 7    month_cosine                         float64
 8    year                                 float64
 9    datetime                             object 
 10   a_age                                float64
 11   a_gender2_female                     float64
 12   a_gender2_male                       float64
 13   a_gender2_nan                        float64
 14   anaphyl_confirmed_0                  float64
 15   anaphyl_confirm

In [6]:
with open(os.path.join(data_dir, 'metadata_patient_pred.json'), 'r') as file:
    metadata = json.load(file)


In [7]:
features_for_pred = metadata['numerical_features'] + metadata['categorical_features']
features_for_pred += metadata['spatial_feats_for_pred'] + metadata['temporal_feats_for_pred']

In [8]:
metadata['diagnoses']

['hisdx_severe',
 'hisdx_urti',
 'hisdx_lrti',
 'hisdx_cough_persist',
 'hisdx_diarrhwatery',
 'hisdx_diarrhblood',
 'hisdx_otitis',
 'hisdx_malaria_simple',
 'hisdx_malaria_sev',
 'hisdx_malnut',
 'hisdx_measles',
 'hisdx_anemia',
 'hisdx_others',
 'hisdx_ot_skin',
 'hisdx_ot_uti',
 'hisdx_ot_eye',
 'hisdx_ot_mouth',
 'hisdx_ot_fever_viral']

In [9]:
prediction_data = df[features_for_pred].copy()
categorical_feature_indices = [prediction_data.columns.get_loc(c) for c in metadata['categorical_features']]

prediction_data

Unnamed: 0,a_age,a_gender2_female,a_gender2_male,a_gender2_nan,anaphyl_confirmed_0,anaphyl_confirmed_1,anaphyl_confirmed_nan,danger_sign_jaundice_0,danger_sign_jaundice_1,danger_sign_jaundice_nan,...,s_hematuria_0,s_hematuria_1,s_hematuria_nan,latitude,longitude,week_number_sine,week_number_cosine,month_sine,month_cosine,year
0,-0.222066,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.648635,-0.788528,1.005444,0.954471,1.187091,0.742937,-2.104623
1,-0.635496,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.648635,-0.788528,0.116275,1.420759,0.668856,1.260701,-2.104623
2,-0.635496,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.629644,-0.829146,-1.114237,-0.921859,-1.265222,-0.671621,-2.104623
3,-0.635496,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.648635,-0.788528,1.199349,-0.641170,1.187091,-0.671621,-2.104623
4,-1.324546,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.648635,-0.788528,-0.712413,1.269021,-0.039066,1.450215,-3.037628
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
310284,1.087129,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.698784,-0.756215,1.320391,0.354882,1.376778,0.035658,0.694394
310285,-1.117831,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.698784,-0.756215,0.284458,1.389964,0.668856,1.260701,0.694394
310286,0.329174,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.698784,-0.756215,1.351212,-0.154226,1.187091,-0.671621,0.694394
310287,-0.290971,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.698784,-0.756215,-0.556493,-1.306526,-0.746987,-1.189385,0.694394


In [12]:
a, b = df['hisdx_malaria_sev'].value_counts()
b/(a+b)

0.007086941528703886

In [13]:
a, b = df['hisdx_malaria_simple'].value_counts()
b/(a+b)

0.19222724621240198

In [15]:
labels = df[metadata['diagnoses']]
labels
metadata['diagnoses']

['hisdx_severe',
 'hisdx_urti',
 'hisdx_lrti',
 'hisdx_cough_persist',
 'hisdx_diarrhwatery',
 'hisdx_diarrhblood',
 'hisdx_otitis',
 'hisdx_malaria_simple',
 'hisdx_malaria_sev',
 'hisdx_malnut',
 'hisdx_measles',
 'hisdx_anemia',
 'hisdx_others',
 'hisdx_ot_skin',
 'hisdx_ot_uti',
 'hisdx_ot_eye',
 'hisdx_ot_mouth',
 'hisdx_ot_fever_viral']

## Prediction

Train binary a classifier for each diagnosis.

In [117]:
classifier_base = SGDClassifier(
    #loss='hinge', # -> Linear SVM
    loss='log',    # -> Logistic Regression
    penalty='l2', 
    alpha=0.0001, 
    l1_ratio=0.15,
    max_iter=1000, tol=0.001, 
    shuffle=True, 
    verbose=0, 
    epsilon=0.1, 
    n_jobs=None, 
    random_state=None, 
    learning_rate='optimal', 
    eta0=0.0, 
    power_t=0.5, 
    early_stopping=False, 
    validation_fraction=0.1, 
    n_iter_no_change=5, 
    average=False
)

In [101]:
for diag in metadata['diagnoses']:
    y = labels[diag] # Labels the current diagnosis
    
    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(prediction_data, y, test_size=0.2, random_state=None)
    
    # Classifier instance
    classifier = copy.deepcopy(classifier_base) # Copy classifier instance
    
    classifier.fit(X_train, y_train) # Train
    y_pred = classifier.predict(X_test) # Test
    
    # Quantify scores
    score = classifier.decision_function(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    rocauc = roc_auc_score(y_test, score)
    
    #Printing
    print(f'Acc:{acc:.3f},  F1:{f1:.3f},  ROC_AUC:{rocauc:.3f}, Diagnosis:{diag}')

Acc:0.978,  F1:0.556,  ROC_AUC:0.930, Diagnosis:hisdx_severe
Acc:0.971,  F1:0.964,  ROC_AUC:0.981, Diagnosis:hisdx_urti
Acc:0.972,  F1:0.875,  ROC_AUC:0.966, Diagnosis:hisdx_lrti
Acc:0.999,  F1:0.302,  ROC_AUC:1.000, Diagnosis:hisdx_cough_persist
Acc:0.997,  F1:0.995,  ROC_AUC:0.999, Diagnosis:hisdx_diarrhwatery
Acc:0.999,  F1:0.971,  ROC_AUC:0.994, Diagnosis:hisdx_diarrhblood
Acc:0.999,  F1:0.976,  ROC_AUC:0.990, Diagnosis:hisdx_otitis
Acc:0.995,  F1:0.987,  ROC_AUC:0.995, Diagnosis:hisdx_malaria_simple
Acc:0.996,  F1:0.579,  ROC_AUC:0.969, Diagnosis:hisdx_malaria_sev
Acc:0.944,  F1:0.106,  ROC_AUC:0.858, Diagnosis:hisdx_malnut
Acc:1.000,  F1:0.824,  ROC_AUC:0.990, Diagnosis:hisdx_measles
Acc:0.997,  F1:0.836,  ROC_AUC:0.996, Diagnosis:hisdx_anemia
Acc:0.973,  F1:0.943,  ROC_AUC:0.983, Diagnosis:hisdx_others
Acc:0.998,  F1:0.991,  ROC_AUC:0.998, Diagnosis:hisdx_ot_skin
Acc:0.996,  F1:0.806,  ROC_AUC:0.950, Diagnosis:hisdx_ot_uti
Acc:0.997,  F1:0.971,  ROC_AUC:0.996, Diagnosis:hisdx_ot

## Recursive Feature Elimination

Recursively remove weights of the linear classifier by selecting the most important features.

In [20]:
for diag in metadata['diagnoses']:
    y = labels[diag] # Labels the current diagnosis
    X_train, X_test, y_train, y_test = train_test_split(prediction_data, y, test_size=0.2, random_state=None)
    
    # Train a classifier with full features
    classifier = copy.deepcopy(classifier_base) # Copy classifier instance
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    score = classifier.decision_function(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1_full = f1_score(y_test, y_pred)
    rocauc = roc_auc_score(y_test, score)
    
    # Recursively eliminate weights to reach a desired subset of features
    classifier = copy.deepcopy(classifier_base)
    rfe = RFE(classifier, n_features_to_select=4, step=10, verbose=1, importance_getter='auto')
    
    rfe.fit(X_train, y_train) # Train 
    y_pred = rfe.predict(X_test) # Test
    
    # Quantify scores
    score = rfe.decision_function(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1_topK = f1_score(y_test, y_pred)
    rocauc = roc_auc_score(y_test, score)
    
    # Print results
    #print(f'Acc:{acc:.3f},  F1:{f1:.3f},  ROC_AUC:{rocauc:.3f}, Diagnosis:{diag}')
    print(f'Full features F1:{f1_full:.3f} | Top 4 features F1:{f1_topK:.3f} Diagnosis:{diag}')
    print('Top 4 features:', rfe.get_feature_names_out())
    print('')

Fitting estimator with 197 features.
Fitting estimator with 187 features.
Fitting estimator with 177 features.
Fitting estimator with 167 features.
Fitting estimator with 157 features.
Fitting estimator with 147 features.
Fitting estimator with 137 features.
Fitting estimator with 127 features.
Fitting estimator with 117 features.
Fitting estimator with 107 features.
Fitting estimator with 97 features.
Fitting estimator with 87 features.
Fitting estimator with 77 features.
Fitting estimator with 67 features.
Fitting estimator with 57 features.
Fitting estimator with 47 features.
Fitting estimator with 37 features.
Fitting estimator with 27 features.
Fitting estimator with 17 features.
Fitting estimator with 7 features.
Full features F1:0.509 | Top 4 features F1:0.348 Diagnosis:hisdx_severe
Top 4 features: ['s_pallor_none' 's_pallor_nan' 'hydration_neuro_lethargic'
 'bloody_stool_1']

Fitting estimator with 197 features.
Fitting estimator with 187 features.
Fitting estimator with 177 fe

KeyboardInterrupt: 

## Cross-Validation

#### Cross-Validation of Linear Classifier

In [None]:
kf = KFold(n_splits=5)

for diag in metadata['diagnoses']:
    y = labels[diag] # Labels the current diagnosis
    
    # Split dataset 5-Fold
    for train_index, test_index in kf.split(prediction_data):

        X_train, X_test = prediction_data[train_index], prediction_data[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        # Classifier instance
        classifier = copy.deepcopy(classifier_base) # Copy classifier instance
        
        classifier.fit(X_train, y_train) # Train
        y_pred = classifier.predict(X_test) # Predict
        
        score = classifier.decision_function(X_test)
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        rocauc = roc_auc_score(y_test, score)
        print(f'Acc:{acc:.3f},  F1:{f1:.3f},  ROC_AUC:{rocauc:.3f}, Diagnosis:{diag}')

#### Cross-Validation of Recursive Feature Elimination

In [19]:
for diag in metadata['diagnoses']:
    y = labels[diag]
    X_train, X_test, y_train, y_test = train_test_split(prediction_data, y, test_size=0.2, random_state=None)
    
    # Train a classifier with full features
    classifier = copy.deepcopy(classifier_base) # Copy classifier instance
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    score = classifier.decision_function(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1_full = f1_score(y_test, y_pred)
    rocauc = roc_auc_score(y_test, score)
    
    # Recursively eliminate weights to reach a desired subset of features
    classifier = copy.deepcopy(classifier_base)
    rfecv = RFECV(classifier, step=10, min_features_to_select=4, cv=5, verbose=1) # 5-Folds CV
    rfecv.fit(X_train, y_train)
    y_pred = rfecv.predict(X_test)
    score = rfecv.decision_function(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1_topK = f1_score(y_test, y_pred)
    rocauc = roc_auc_score(y_test, score)
    
    #print(f'Acc:{acc:.3f},  F1:{f1:.3f},  ROC_AUC:{rocauc:.3f}, Diagnosis:{diag}')
    print(f'Full features F1:{f1_full:.3f} | Top 4 features F1:{f1_topK:.3f} Diagnosis:{diag}')
    print('Top 4 features:', rfecv.get_feature_names_out())
    print('')

Fitting estimator with 197 features.
Fitting estimator with 187 features.
Fitting estimator with 177 features.
Fitting estimator with 167 features.
Fitting estimator with 157 features.
Fitting estimator with 147 features.
Fitting estimator with 137 features.
Fitting estimator with 127 features.
Fitting estimator with 117 features.
Fitting estimator with 107 features.
Fitting estimator with 97 features.
Fitting estimator with 87 features.
Fitting estimator with 77 features.
Fitting estimator with 67 features.
Fitting estimator with 57 features.
Fitting estimator with 47 features.
Fitting estimator with 37 features.
Fitting estimator with 27 features.
Fitting estimator with 17 features.
Fitting estimator with 7 features.
Fitting estimator with 197 features.
Fitting estimator with 187 features.
Fitting estimator with 177 features.
Fitting estimator with 167 features.
Fitting estimator with 157 features.
Fitting estimator with 147 features.
Fitting estimator with 137 features.
Fitting esti



Full features F1:0.554 | Top 4 features F1:0.553 Diagnosis:hisdx_severe
Top 4 features: ['mouth_trush_0' 's_oedema_0' 's_oedema_1' 's_pallor_moderate'
 's_pallor_none' 's_pallor_severe' 's_pallor_nan' 's_drepano_0'
 's_drepano_1' 's_mouthpb_1' 'wfa_less_than_neg3sd_1' 'ms_measles_1'
 'cough_malaise_his_0' 'cough_malaise_his_1' 'cough_wheez_rhinitis_0'
 'cough_wheezing_0' 'cough_wheezing_1' 'hydration_eye_1'
 'hydration_neuro_0' 'hydration_neuro_irritable'
 'hydration_neuro_lethargic' 'hydration_skin_0' 'hydration_skin_slow'
 'hydration_skin_very-slow' 'hydration_thirst_thirsty' 'bloody_stool_0'
 'bloody_stool_1' 's_diarr_0' 's_diarr_1' 's_vomit_1'
 'ear_tender_swelling_0' 'ear_tender_swelling_1' 'eye_sympt_nan'
 's_eyepb_1' 'lab_urine_pos_0' 'lab_urine_pos_1' 's_dysuria_0']

Fitting estimator with 197 features.
Fitting estimator with 187 features.
Fitting estimator with 177 features.
Fitting estimator with 167 features.
Fitting estimator with 157 features.
Fitting estimator with 147 fe



Full features F1:0.964 | Top 4 features F1:0.964 Diagnosis:hisdx_urti
Top 4 features: ['anaphyl_confirmed_nan' 's_pallor_severe' 's_drepano_0' 's_drepano_1'
 's_throat_0' 's_throat_1' 'sam_u6add_ocp_nan' 'wfa_less_than_neg3sd_nan'
 'ms_measles_0' 's_none_1' 'no_symptom_classifying_reported_0'
 'no_symptom_ecare_reported_1' 'cough_indrawing_0' 'cough_indrawing_1'
 'cough_malaise_his_0' 'cough_malaise_his_1' 'cough_prolonged_0'
 'cough_prolonged_1' 'cough_wheez_rhinitis_0' 'cough_wheez_rhinitis_1'
 'cough_wheezing_0' 'cough_wheezing_1' 'high_respiratory_rate_0'
 'high_respiratory_rate_1' 'high_respiratory_rate_nan' 's_cough_0'
 's_cough_1' 'hydration_assessed_1' 'hydration_neuro_0'
 'hydration_thirst_nan' 's_diarr_0' 's_diarr_1' 's_vomit_0' 'latitude'
 'longitude' 'week_number_cosine' 'month_cosine']

Fitting estimator with 197 features.
Fitting estimator with 187 features.
Fitting estimator with 177 features.
Fitting estimator with 167 features.
Fitting estimator with 157 features.
Fitt



Full features F1:0.877 | Top 4 features F1:0.876 Diagnosis:hisdx_lrti
Top 4 features: ['anaphyl_confirmed_0' 'anaphyl_confirmed_1' 'anaphyl_confirmed_nan'
 'danger_sign_jaundice_nan' 'fever_prolonged_1' 'lab_malaria_pos_1'
 's_oedema_0' 's_pallor_moderate' 's_pallor_none' 's_pallor_nan'
 's_drepano_0' 's_throat_0' 's_mouthpb_0' 's_joint_1' 'sam_u6add_ocp_0'
 'sam_u6add_ocp_nan' 'wfa_less_than_neg3sd_0' 'wfa_less_than_neg3sd_1'
 'wfa_less_than_neg3sd_nan' 'd_convuls_now_0' 'danger_sign_neck_nan'
 'danger_sign_interaction_nan' 'danger_sign_tonus_nan' 'ms_measles_0'
 'ms_measles_1' 'ms_measles_nan' 's_none_0' 's_none_1'
 'no_symptom_ecare_reported_0' 'no_symptom_ecare_reported_1'
 'cough_indrawing_0' 'cough_indrawing_1' 'cough_malaise_his_0'
 'cough_malaise_his_1' 'cough_prolonged_0' 'cough_prolonged_1'
 'cough_wheez_rhinitis_0' 'cough_wheez_rhinitis_1' 'cough_wheeze_rash_0'
 'cough_wheezing_0' 'cough_wheezing_1' 'high_respiratory_rate_0'
 'high_respiratory_rate_1' 'high_respiratory_rate_



Full features F1:0.200 | Top 4 features F1:0.308 Diagnosis:hisdx_cough_persist
Top 4 features: ['a_age' 'a_gender2_female' 'a_gender2_male' 'a_gender2_nan'
 'anaphyl_confirmed_0' 'anaphyl_confirmed_1' 'anaphyl_confirmed_nan'
 'danger_sign_jaundice_0' 'danger_sign_jaundice_1'
 'danger_sign_jaundice_nan' 'fever_prolonged_0' 'fever_prolonged_1'
 'fever_prolonged_nan' 'lab_malaria_pos_0' 'lab_malaria_pos_1'
 'lab_malaria_pos_nan' 'mouth_trush_0' 'mouth_trush_1' 'mouth_trush_nan'
 's_oedema_0' 's_oedema_1' 's_oedema_nan' 's_pallor_moderate'
 's_pallor_none' 's_pallor_severe' 's_pallor_nan' 's_drepano_0'
 's_drepano_1' 's_drepano_nan' 's_throat_0' 's_throat_1' 's_throat_nan'
 's_mouthpb_0' 's_mouthpb_1' 's_joint_0' 's_joint_1' 'sam_u6add_ocp_0'
 'sam_u6add_ocp_1' 'sam_u6add_ocp_nan' 'wfa_less_than_neg3sd_0'
 'wfa_less_than_neg3sd_1' 'wfa_less_than_neg3sd_nan' 'd_unconscious_0'
 'd_unconscious_1' 'd_convuls_his_0' 'd_convuls_his_1' 'd_convuls_now_0'
 'd_convuls_now_1' 'danger_sign_neck_0' 'da



Full features F1:0.996 | Top 4 features F1:0.995 Diagnosis:hisdx_diarrhwatery
Top 4 features: ['a_age' 'a_gender2_female' 'anaphyl_confirmed_0' 'anaphyl_confirmed_nan'
 'danger_sign_jaundice_0' 'danger_sign_jaundice_1'
 'danger_sign_jaundice_nan' 'fever_prolonged_0' 'lab_malaria_pos_0'
 'lab_malaria_pos_1' 'mouth_trush_0' 'mouth_trush_1' 'mouth_trush_nan'
 's_oedema_0' 's_oedema_1' 's_pallor_moderate' 's_pallor_none'
 's_pallor_severe' 's_pallor_nan' 's_drepano_0' 's_drepano_1' 's_throat_0'
 's_throat_1' 's_mouthpb_0' 's_mouthpb_1' 's_joint_0' 's_joint_1'
 'sam_u6add_ocp_0' 'sam_u6add_ocp_nan' 'wfa_less_than_neg3sd_0'
 'wfa_less_than_neg3sd_1' 'wfa_less_than_neg3sd_nan' 'd_unconscious_0'
 'd_unconscious_1' 'd_convuls_his_0' 'd_convuls_now_0'
 'danger_sign_neck_0' 'danger_sign_neck_nan' 'danger_sign_interaction_0'
 'danger_sign_interaction_1' 'danger_sign_interaction_nan'
 'danger_sign_tonus_0' 'danger_sign_tonus_1' 'danger_sign_tonus_nan'
 's_limp_0' 'ms_measles_0' 'ms_measles_1' 'ms_m

KeyboardInterrupt: 