# Symptom Classification

## Most Frequent Symptoms

In [1]:
from os import path
import pandas as pd
import numpy as np
import matplotlib as plt
import re
import pickle

### Symptom Data

In [20]:
sym = pd.read_csv('covid_adv_sym.csv')

Unnamed: 0,VAERS_ID,SYMPTOM1,SYMPTOMVERSION1,SYMPTOM2,SYMPTOMVERSION2,SYMPTOM3,SYMPTOMVERSION3,SYMPTOM4,SYMPTOMVERSION4,SYMPTOM5,SYMPTOMVERSION5
39251,896636,Arthralgia,24.0,Confusional state,24.0,Fatigue,24.0,Feeling abnormal,24.0,Head discomfort,24.0
39252,896636,Memory impairment,24.0,Pain in extremity,24.0,Peripheral swelling,24.0,Physiotherapy,24.0,Pyrexia,24.0
44580,902418,Hypoaesthesia,24.0,Injection site hypoaesthesia,24.0,,,,,,
44591,902440,Headache,23.1,,,,,,,,
44594,902446,Erythema,23.1,Feeling hot,23.1,Flushing,23.1,,,,
...,...,...,...,...,...,...,...,...,...,...,...
531237,1427468,Headache,24.0,Unresponsive to stimuli,24.0,,,,,,
531238,1427471,Dizziness,24.0,Syncope,24.0,Unresponsive to stimuli,24.0,,,,
531239,1427472,Chest discomfort,24.0,,,,,,,,
531240,1427475,Death,24.0,,,,,,,,


In [16]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /Users/jlee/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/jlee/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [17]:
symptom_1 = list(sym.SYMPTOM1)
symptom_2 = list(sym.SYMPTOM2)
symptom_3 = list(sym.SYMPTOM3)
symptom_4 = list(sym.SYMPTOM4)
symptom_5 = list(sym.SYMPTOM5)

total_symptom = symptom_1 + symptom_2 + symptom_3 + symptom_4 + symptom_5
total_symptom = [s for s in total_symptom if str(s) != 'nan']

In [18]:
total_symptom_t = []
for s in total_symptom:
    tokens = nltk.word_tokenize(s.lower())
    total_symptom_t.append(tokens)

In [19]:
def flatten(lst):
    return[item for sublist in lst for item in sublist]

total_symptom_lst = flatten(total_symptom_t)

In [20]:
freq = nltk.FreqDist(total_symptom_lst)

In [22]:
symptom_1 = list(sym.SYMPTOM1.unique())
symptom_2 = list(sym.SYMPTOM2.unique())
symptom_3 = list(sym.SYMPTOM3.unique())
symptom_4 = list(sym.SYMPTOM4.unique())
symptom_5 = list(sym.SYMPTOM5.unique())

symptom_lst_unique = symptom_1 + symptom_2 + symptom_3 + symptom_4 + symptom_5
symptom_lst_unique = set(symptom_lst_unique)
symptom_lst_unique = [s for s in symptom_lst_unique if str(s) != 'nan']

In [23]:
len(symptom_lst_unique)

8664

In [26]:
# string match function to extract symptoms

def symptomfind(symptom_category, symptom_column):
    index_list = []
    match_list = []
    for index, symp in symptom_column.items():
        if symp.lower() in symptom_category:
            match_list.append(symp)
            index_list.append(index)
            
    return index_list, match_list 

In [19]:
def symptomfind(symptom_category, symptom_column):
    index_list = []
    match_list = []
    for index, symp in symptom_column.items():
        if re.findall(r"(?=("+'|'.join(symptom_category)+r"))", str(symp).lower()):
            match_list.append(symp)
            index_list.append(index)
        
    return index_list, match_list 

In [25]:
# check for every symptom column to get unique set
def unique_symptoms(symptom_category):
    symptom_columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5']
    syms = []
    for cols in symptom_columns:
        ind_list, sym_list = symptomfind(symptom_category, sym[cols])
        syms.extend(sym_list)
        
    unique_syms = list(set(syms))
    return(unique_syms)  

In [155]:
def symptom_info(symptom_category):
    symptom_columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5']
    syms = []
    for cols in symptom_columns:
        ind_list, sym_list = symptomfind(symptom_category, sym[cols])
        syms.extend(sym_list)
        
    sym_dict = {s : syms.count(s) for s in syms}
    return(sym_dict)

#### Joint Issues

In [26]:
joint = ['joint', 'arthralgia', 'arthritis']
joint_symptom = unique_symptoms(joint)

In [27]:
symptom_lst_unique = [s for s in symptom_lst_unique if s not in joint_symptom]

In [28]:
remove_list = ['Magnetic resonance imaging joint', 'Ultrasound joint',
                   'Joint injection', 'Joint stabilisation', 'Joint arthroplasty',
                    'Physical examination of joints abnormal','Joint range of motion measurement',
                    'Temporomandibular joint surgery', 'Joint dislocation reduction']

joint_symptom = [i for i in joint_symptom if i not in remove_list]

In [29]:
symptom_dict = {'Joint issues': joint_symptom}

In [30]:
symptom_dict['Joint issues'].append('Rheumatic fever')

#### Pain

In [326]:
pain = ['pain']
pain_symptom = unique_symptoms(pain)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in pain_symptom]

In [327]:
remove_list = ['Pain in extremity','Pain assessment', 'Chest pain']
pain_symptom = [i for i in pain_symptom if i not in remove_list]

In [334]:
symptom_dict['Pain'] = pain_symptom

#### Pain in extremity

In [34]:
pain_ex = ['pain in extremity']
pain_ex_symptom = unique_symptoms(pain_ex)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in pain_ex_symptom]
symptom_dict['Pain_extreme']: pain_ex_symptom

#### Headache

In [35]:
headache = ['headache']
head_symptom = unique_symptoms(headache)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in head_symptom]
symptom_dict['Headache'] = head_symptom

#### Pyrexia

In [37]:
pyrexia = ['pyrexia', 'fever']
pyrexia_symptom = unique_symptoms(pyrexia)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in pyrexia_symptom]
remove_list = ['Rheumatic fever']
pyrexia_symptom = [i for i in pyrexia_symptom if i not in remove_list]
symptom_dict['Pyrexia'] = pyrexia_symptom

#### Fatigue

In [38]:
fatigue = ['fatigue', 'tireness', 'tire']
fatigue_symptom = unique_symptoms(fatigue)
remove_list = ['Respiratory fatigue']
fatigue_symptom = [i for i in fatigue_symptom if i not in remove_list]
symptom_lst_unique = [s for s in symptom_lst_unique if s not in fatigue_symptom]
symptom_dict['Fatigue'] = fatigue_symptom

#### Chills

In [39]:
chills = ['chill', 'cold']
chills_symptom = unique_symptoms(chills)
remove_list = ['Cold agglutinins','Cold agglutinins negative',
               'Cold-stimulus headache','Cold agglutinins positive',
               'Peripheral coldness','Cold urticaria','Cold burn', 
               'Reversed hot-cold sensation', 'Chillblains'
               ]
chills_symptom = [i for i in chills_symptom if i not in remove_list]
symptom_lst_unique = [s for s in symptom_lst_unique if s not in chills_symptom]
symptom_dict['Chills'] = chills_symptom

#### Swelling

In [40]:
swelling = ['swelling', 'swollen']
swelling_symptom = unique_symptoms(swelling)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in swelling_symptom]
symptom_dict['Swelling'] = swelling_symptom

#### Rash

In [41]:
rash = ['rash']
rash_symptom = unique_symptoms(rash)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in rash_symptom]
symptom_dict['Rash'] = rash_symptom

#### Dizziness

In [42]:
dizziness = ['dizziness', 'dizzy']
dizz_symptom = unique_symptoms(dizziness)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in dizz_symptom]
symptom_dict['Dizziness'] = dizz_symptom

#### Nausea

In [44]:
nausea = ['nausea', 'queasy stomach']
nausea_symptom = unique_symptoms(nausea)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in nausea_symptom]
symptom_dict['Nausea'] = nausea_symptom

#### Pruritus

In [138]:
pruritus = ['pruritus', 'skin itchness', 'itchy skin', 'pruritic']
pruritus_symptom = unique_symptoms(pruritus)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in pruritus_symptom]
symptom_dict['Pruritus'] = pruritus_symptom

#### Erythema

In [50]:
erythema = ['erythema']
erythema_symptom = unique_symptoms(erythema)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in erythema_symptom]
symptom_dict['Erythema'] = erythema_symptom

#### Chest pain

In [324]:
chest_pain = ['chest', 'cardiac discomfort']
chest_symptom = unique_symptoms(chest_pain)
remove_list = ['Ultrasound chest', 'Flat chest', 'Chest X-ray normal',
               'Chest scan', 'Chest X-ray', 'Biopsy chest wall', 
               'Chest wall mass', 'Chest wall abscess', 'Chest wall haematoma',
               'Chest X-ray abnormal', 'Chest tube insertion', 'Chest expansion decreased',
              'Chest injury']
chest_symptom = [i for i in chest_symptom if i not in remove_list]
symptom_lst_unique = [s for s in symptom_lst_unique if s not in chest_symptom]


symptom_dict['Chest_pain'] = chest_symptom

#### Myalgia

In [72]:
myalgia = ['myalgia']
myalgia_symptom = unique_symptoms(myalgia)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in myalgia_symptom]

symptom_dict['Myalgia'] = myalgia_symptom

#### Dyspnoea

In [75]:
dyspnoea = ['dyspnoea', 'short breath']
dyspnoea_symptom = unique_symptoms(dyspnoea)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in dyspnoea_symptom]

symptom_dict['Dyspnoea'] = dyspnoea_symptom

#### Paraesthesia

In [78]:
paraesthesia = ['paraesthesia']
paraesthesia_symptom = unique_symptoms(paraesthesia)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in paraesthesia_symptom]

symptom_dict['Paraesthesia'] = paraesthesia_symptom

#### Hypoaesthesia

In [82]:
hypoaesthesia = ['hypoaesthesia']
hypoaesthesia_symptom = unique_symptoms(hypoaesthesia)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in hypoaesthesia_symptom]

symptom_dict['Hypoaesthesia'] = hypoaesthesia_symptom

#### Asthenia

In [88]:
asthenia = ['asthenia']
asthenia_symptom = unique_symptoms(asthenia)
remove_list = ['Myasthenia gravis', 'Ocular myasthenia',
              'Myasthenia gravis crisis']
asthenia_symptom = [i for i in asthenia_symptom if i not in remove_list]
symptom_lst_unique = [s for s in symptom_lst_unique if s not in asthenia_symptom]

symptom_dict['Asthenia'] = asthenia_symptom

#### Muscle weakness

In [91]:
muscle_weakness = ['myasthenia', 'muscle weakness']
muscle_weakness_symptom = unique_symptoms(muscle_weakness)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in muscle_weakness_symptom]

symptom_dict['Muscle_weakness'] = muscle_weakness_symptom

#### Muscle tightness

In [241]:
muscle_tightness = ['muscle tightness']
muscle_tightness_symptom = unique_symptoms(muscle_tightness)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in muscle_tightness_symptom]

symptom_dict['Muscle_tightness'] = muscle_tightness_symptom

#### Vomiting

In [95]:
vomiting = ['vomiting', 'throw up']
vomiting_symptom = unique_symptoms(vomiting)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in vomiting_symptom]

symptom_dict['Vomiting'] = vomiting_symptom

#### Hyperhidrosis

In [103]:
hyperhidrosis = ['hyperhidrosis', 'sweating']
hyperhidrosis_symptom = unique_symptoms(hyperhidrosis)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in hyperhidrosis_symptom]

symptom_dict['Hyperhidrosis'] = hyperhidrosis_symptom

#### Urticaria

In [108]:
urticaria = ['urticaria', 'hives', 'weals', 'nettle rash']
urticaria_symptom = unique_symptoms(urticaria)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in urticaria_symptom]

symptom_dict['Urticaria'] = urticaria_symptom

#### Diarrhoea

In [112]:
diarrhoea = ['diarrhoea', 'loose stool']
diarrhoea_symptom = unique_symptoms(diarrhoea)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in diarrhoea_symptom]

symptom_dict['Diarrhoea'] = diarrhoea_symptom

#### Abdominal issues

In [115]:
abdominal = ['abdominal']
abdominal_symptom = unique_symptoms(abdominal)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in abdominal_symptom]
remove_list = ['Abdominal X-ray','Biopsy abdominal wall', 'Intra-abdominal fluid collection',
              'Abdominal exploration', 'Biopsy abdominal wall normal',
              'Abdominal operation', 'Magnetic resonance imaging abdominal',
              'Magnetic resonance imaging abdominal normal', 'Abdominal cavity drainage']
abdominal_symptom = [i for i in abdominal_symptom if i not in remove_list]

symptom_dict['Abdominal'] = abdominal_symptom

#### Malaise

In [123]:
malaise = ['malaise']
malaise_symptom = unique_symptoms(malaise)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in malaise_symptom]

symptom_dict['Malaise'] = malaise_symptom

#### Cough

In [124]:
cough = ['cough']
cough_symptom = unique_symptoms(cough)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in cough_symptom]

symptom_dict['Cough'] = cough_symptom

#### Tinnitus

In [129]:
tinnitus = ['tinnitus']
tinnitus_symptom = unique_symptoms(tinnitus)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in tinnitus_symptom]

symptom_dict['Tinnitus'] = tinnitus_symptom

#### Palpitations

In [132]:
palpitations = ['palpitations']
palpitations_symptom = unique_symptoms(palpitations)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in palpitations_symptom]

symptom_dict['Palpitations'] = palpitations_symptom

In [141]:
len(symptom_dict.keys())

26

#### Flushing

In [395]:
flushing = ['flushing', 'blushing', 'hot flush']
flushing_symptom = unique_symptoms(flushing)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in flushing_symptom]

symptom_dict['Flushing'] = flushing_symptom

#### Blood pressure increased

In [176]:
blood_increased = ['blood']
blood_increased_symptom = unique_symptoms(blood_increased)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in blood_increased_symptom]
remove_list = [s for s in blood_increased_symptom if s != 'Blood pressure increased']
blood_increased_symptom = [i for i in blood_increased_symptom if i not in remove_list]
#blood_symptom = symptom_info(blood_increased)

symptom_dict['Blood_pressure_increased'] = blood_increased_symptom

#### Feeling abnormal

In [188]:
feeling_abnormal = ['feeling abnormal']
feeling_abnormal_symptom = unique_symptoms(feeling_abnormal)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in feeling_abnormal_symptom]

symptom_dict['Feeling_abnormal'] = feeling_abnormal_symptom

#### Feeling hot

In [191]:
feeling_hot = ['feeling hot']
feeling_hot_symptom = unique_symptoms(feeling_hot)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in feeling_hot_symptom]

symptom_dict['Feeling_hot'] = feeling_hot_symptom

#### Tremor

In [199]:
tremor = ['tremor']
tremor_symptom = unique_symptoms(tremor)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in tremor_symptom]

symptom_dict['Tremor'] = tremor_symptom

#### Lymphadenopathy

In [202]:
lymphadenopathy = ['lymphadenopathy']
lymphadenopathy_symptom = unique_symptoms(lymphadenopathy)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in lymphadenopathy_symptom]

symptom_dict['Lymphadenopathy'] = lymphadenopathy_symptom

#### Loss of consciousness

In [211]:
syncope = ['syncope', 'loss of consciousness']
syncope_symptom = unique_symptoms(syncope)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in syncope_symptom]

symptom_dict['Syncope'] = syncope_symptom

#### Throat irritation

In [356]:
throat = ['throat', 'oropharyngeal pain', 'oropharyngeal discomfort']
throat_symptom = unique_symptoms(throat)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in throat_symptom]
remove_list = ['Ear, nose and throat examination normal',
               'Culture throat negative','Throat lesion',
               'Ear, nose and throat examination','Foreign body in throat',
               'Ear, nose and throat examination abnormal','Culture throat positive']
throat_symptom = [i for i in throat_symptom if i not in remove_list]

symptom_dict['Throat_irritation'] = throat_symptom

#### Unresponsive to Stimuli (include slow response)

In [218]:
stimuli = ['stimuli']
stimuli_symptom = unique_symptoms(stimuli)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in stimuli_symptom]

symptom_dict['Unresponsive_stimuli'] = stimuli_symptom

#### Burning sensation

In [230]:
burning = ['burning sensation', 'skin burning sensation', 'burning']
burning_symptom = unique_symptoms(burning)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in burning_symptom]

symptom_dict['Burning_sensation'] = burning_symptom

#### Other Skin issues

In [257]:
skin = ['skin']
skin_symptom = unique_symptoms(skin)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in skin_symptom]
remove_list = ['Dyskinesia', 'Exposure via skin contact',
              'Skin lesion', 'Skin exfoliation', 'Biopsy skin', 
              'Biliary dyskinesia', 'Biopsy skin normal', 'Skin test positive',
              'Skin test','Skin test negative', 'Skin cosmetic procedure', 'Skin implant',
               'Congenital skin dimples','Tardive dyskinesia', 'Skin wound',
               'Foreign body in skin or subcutaneous tissue', 'Artificial skin graft',
               'Biliary dyskinesia',  'Ventricular dyskinesia',  'Skin laxity',
               'Target skin lesion', 'Chemical burn of skin', 'Skin injury',
              'Skin graft']
skin_symptom = [i for i in skin_symptom if i not in remove_list]

symptom_dict['Skin_issues'] = skin_symptom

#### Dyskinesia

In [261]:
dyskinesia = ['dyskinesia']
dyskinesia_symptom = unique_symptoms(dyskinesia)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in dyskinesia_symptom]

symptom_dict['Dyskinesia'] = dyskinesia_symptom

#### Heart rate increased 

In [271]:
heart_increased = ['heart rate increased']
heart_increased_symptom = unique_symptoms(heart_increased)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in heart_increased_symptom]
remove_list = ['Foetal heart rate increased']
heart_increased_symptom = [i for i in heart_increased_symptom if i not in remove_list]

symptom_dict['Heart_rate_increased'] = heart_increased_symptom

#### Heart rate decreased

In [275]:
heart_decreased = ['heart rate decreased']
heart_decreased_symptom = unique_symptoms(heart_decreased)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in heart_decreased_symptom]
remove_list = ['Foetal heart rate decreased']
heart_decreased_symptom = [i for i in heart_increased_symptom if i not in remove_list]

symptom_dict['Heart_rate_decreased'] = heart_decreased_symptom

#### Heart rate irregular

In [302]:
heart_irregular = ['heart rate irregular', 'cardiac flutter' ]
heart_irregular_symptom = unique_symptoms(heart_irregular)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in heart_irregular_symptom]

symptom_dict['Heart_rate_irregular'] = heart_irregular_symptom 

#### Paralysis (mainly Facial Paralysis)

In [283]:
paralysis = ['paralysis']
paralysis_symptom = unique_symptoms(paralysis)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in paralysis_symptom]

symptom_dict['Paralysis'] = paralysis_symptom 

#### Tachycardia

In [288]:
tachycardia = ['tachycardia']
tachycardia_symptom = unique_symptoms(tachycardia)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in tachycardia_symptom]
remove_list = ['Tachycardia foetal']
tachycardia_symptom = [i for i in tachycardia_symptom if i not in remove_list]

symptom_dict['Tachycardia'] = tachycardia_symptom 

#### Anaphylactic reaction

In [294]:
anaphylactic_reaction = ['anaphylactic reaction']
reaction_symptom = unique_symptoms(anaphylactic_reaction)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in reaction_symptom]

symptom_dict['Anaphylactic_reaction'] = reaction_symptom 

#### Cardiac event (Cardiac arrest, Cardiac failure congestive, Cardiac failure..)

In [319]:
cardiac_event = ['cardiac arrest', 'cardiac failure congestive',
                 'acute cardiac event','cardiac failure chronic',
                 'cardiac failure', 'cardiac failure acute']
cardiac_event_symptom = unique_symptoms(cardiac_event)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in cardiac_event_symptom]

symptom_dict['Cardiac_event'] = cardiac_event_symptom

#### Mobility decreased

In [339]:
mobility = ['mobility decreased']
mobility_symptom = unique_symptoms(mobility)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in mobility_symptom]

symptom_dict['Mobility_decreased'] = mobility_symptom

#### Stroke

In [345]:
stroke = ['cerebrovascular accident']
stroke_symptom = unique_symptoms(stroke)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in stroke_symptom]

symptom_dict['Stroke'] = stroke_symptom

#### Influenza like ilness

In [360]:
influenza = ['influenza', 'influenza like illness']
influenza_symptom = unique_symptoms(influenza)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in influenza_symptom]

symptom_dict['Influenza'] = influenza_symptom

#### SARS-COV-2

In [378]:
sars_cov_2 = ['sars-cov-2 test positive']
covid_symptom = unique_symptoms(sars_cov_2)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in covid_symptom]

symptom_dict['SARS-COV-2'] = covid_symptom

#### Appetite decreased

In [382]:
appetite = ['decreased appetite']
appetite_symptom = unique_symptoms(appetite)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in appetite_symptom]

symptom_dict['Appetite_decreased'] = appetite_symptom

#### Induration

In [391]:
induration = ['induration']
induration_symptom = unique_symptoms(induration)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in induration_symptom]

symptom_dict['Induration'] = induration_symptom

#### Sleep disorder

In [403]:
sleep = ['sleep disorder']
sleep_symptom = unique_symptoms(sleep)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in sleep_symptom]

symptom_dict['Sleep disorder'] = sleep_symptom

#### Condition aggravated

In [418]:
condition = ['condition aggravated']
condition_symptom = unique_symptoms(condition)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in condition_symptom]

symptom_dict['Condition_aggravated'] = condition_symptom

In [428]:
# Saving dict
p = open('symptom.pkl', 'wb')
pickle.dump(symptom_dict, p)
p.close()

In [429]:
# Loading the dict 
p = open('symptom.pkl', 'rb')
output = pickle.load(p)

output
p.close()

In [416]:
test = ['loss']
test_symptom = symptom_info(test)

In [None]:
test_symptom = unique_symptoms(test)
symptom_lst_unique = [s for s in symptom_lst_unique if s not in test_symptom]

In [417]:
test_symptom

{'Loss of consciousness': 7173,
 'Loss of personal independence in daily activities': 2483,
 'Glossitis': 91,
 'Glossodynia': 351,
 'Sensory loss': 310,
 'Trichoglossia': 5,
 'Abnormal loss of weight': 33,
 'Macroglossia': 2,
 'Sudden hearing loss': 157,
 'Ankyloglossia congenital': 2,
 'Tooth loss': 6,
 'Loss of control of legs': 40,
 'Central vision loss': 4,
 'Loss of bladder sensation': 11,
 'Loss of proprioception': 10,
 'Sudden visual loss': 11,
 'Thyroglossal cyst': 2,
 'Hypoglossal nerve paresis': 1,
 'Blood loss anaemia': 17,
 'Loss of libido': 8,
 'Glossopharyngeal neuralgia': 1,
 'Loss of dreaming': 1,
 'Loss of employment': 1,
 'Hypoglossal nerve paralysis': 1}

## Creating Symtom dataset for Statistical Analysis

In [17]:
# Loading the symptom dictionary  
p = open('symptom.pkl', 'rb')
symptom_dict = pickle.load(p)
p.close()

In [78]:
symptom_columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5']
symptom_idx = {}
for key in symptom_dict:
    idx_lsts = []
    for cols in symptom_columns:
        idx_lst, sym_lst = symptomfind(list(map(str.lower, symptom_dict[key])), sym[cols])
        idx_lsts.extend(idx_lst)
    
    symptom_idx[key] = idx_lsts

In [79]:
symptom_ID = {}

for key in symptom_idx:
    symptom_ID[key] = [sym.iloc[i].VAERS_ID for i in symptom_idx[key]]

In [80]:
symptom_ID['Joint_issues'] = symptom_ID['Joint issues']

In [81]:
del symptom_ID['Joint issues']

In [None]:
for key in symptom_ID:
    lst = []
    for ID in covid_adv.VAERS_ID:
        if ID in symptom_ID[key]:
            lst.append('Yes')
        else:
            lst.append('No')
    covid_adv[key] = lst

In [86]:
for key in symptom_ID:
    lst = ['Yes' if ID in symptom_ID[key] else 'No' for ID in covid_adv.VAERS_ID]
    covid_adv[key] = lst

In [87]:
covid_adv

Unnamed: 0,VAERS_ID,AGE_YRS,SEX,DIED,VAX_MANU,Headache,Pyrexia,Fatigue,Chills,Swelling,...,Pain,Mobility_decreased,Stroke,Influenza,SARS-COV-2,Appetite_decreased,Induration,Sleep disorder,Condition_aggravated,Joint_issues
0,1410490,54.0,F,No,PFIZER\BIONTECH,No,No,No,No,No,...,No,No,No,No,No,No,No,No,No,No
1,1413866,67.0,M,No,MODERNA,No,No,No,No,Yes,...,No,No,No,No,No,No,Yes,No,No,No
2,896636,47.0,F,No,MODERNA,No,No,No,No,No,...,No,No,No,No,No,No,No,No,No,No
3,902418,56.0,F,No,PFIZER\BIONTECH,No,No,No,No,No,...,Yes,No,No,No,No,No,No,No,No,No
4,902440,35.0,F,No,PFIZER\BIONTECH,No,No,Yes,No,No,...,No,Yes,No,No,No,No,No,No,No,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
404844,1427468,17.0,F,No,PFIZER\BIONTECH,No,No,No,No,No,...,No,No,No,No,No,No,No,No,No,No
404845,1427471,18.0,M,No,MODERNA,No,No,No,No,No,...,No,No,No,No,No,No,No,No,No,No
404846,1427472,54.0,F,No,MODERNA,No,No,No,No,No,...,No,No,No,No,No,No,No,No,No,No
404847,1427475,87.0,F,Yes,MODERNA,No,No,No,No,No,...,No,No,No,No,No,No,No,No,No,No


In [92]:
covid_adv.to_csv('covid_adv_symptom.csv', index = False)