In [136]:
import pandas as pd
import numpy as np


In [137]:
df = pd.read_csv("disease_symptoms.csv")


In [138]:
df.rename(columns={col: col.lower() for col in df.columns}, inplace=True)
df.rename(columns={col: col.replace('ptom_', '')
          for col in df.columns}, inplace=True)
df.drop_duplicates(inplace=True)


In [139]:
sym_lst = np.array([])

temp_df = df.iloc[:, 1:]

for sym in temp_df:
    sym_lst = np.hstack((sym_lst, temp_df[sym]))

symptoms = pd.DataFrame(sym_lst)
symptoms.drop_duplicates(inplace=True)
del(sym_lst)
del(temp_df)


In [140]:
sym_sev = pd.read_csv('../symptom_severity.csv', index_col='Symptom')
sym_sev.drop_duplicates(inplace=True)


In [141]:
sym_sev.head()


Unnamed: 0_level_0,weight
Symptom,Unnamed: 1_level_1
itching,1
skin_rash,3
nodal_skin_eruptions,4
shivering,5
burning_micturition,6


In [142]:
symptoms.rename(columns={0: 'symptom'}, inplace=True)
symptoms.dropna(inplace=True)
symptoms['symptom'] = symptoms['symptom'].apply(lambda x: x.replace(' ', ''))
symptoms.set_index('symptom', inplace=True)
symptoms.head()


itching
skin_rash
continuous_sneezing
shivering
stomach_pain


In [143]:
symptoms['weights'] = np.nan


In [144]:
avg_weight = sym_sev['weight'].mean()


In [145]:
for index, item in symptoms.iterrows():
    try:
        symptoms.loc[index, 'weights'] = sym_sev.loc[index]['weight']
    except:
        symptoms.loc[index, 'weights'] = avg_weight


In [146]:
symptoms.head()


Unnamed: 0_level_0,weights
symptom,Unnamed: 1_level_1
itching,1.0
skin_rash,3.0
continuous_sneezing,4.0
shivering,5.0
stomach_pain,4.0


In [147]:
df.head(10)


Unnamed: 0,disease,sym1,sym2,sym3,sym4,sym5,sym6,sym7,sym8,sym9,sym10,sym11,sym12,sym13,sym14,sym15,sym16,sym17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic_patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic_patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic_patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic_patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,
10,Allergy,continuous_sneezing,shivering,chills,watering_from_eyes,,,,,,,,,,,,,
11,Allergy,shivering,chills,watering_from_eyes,,,,,,,,,,,,,,
12,Allergy,continuous_sneezing,chills,watering_from_eyes,,,,,,,,,,,,,,
13,Allergy,continuous_sneezing,shivering,watering_from_eyes,,,,,,,,,,,,,,
14,Allergy,continuous_sneezing,shivering,chills,,,,,,,,,,,,,,


In [148]:
def clean_word(string):
	s=string.lower().split(' ')
	s = [x for x in s if x != '']
	return ' '.join(s)

In [149]:
df.reset_index(inplace=True)
df['disease']=df['disease'].apply(clean_word)
df.set_index('disease',inplace=True)

In [150]:
%matplotlib inline


In [151]:
discription_df = pd.read_csv('disease_description.csv')
discription_df['Disease'] = discription_df['Disease'].apply(clean_word)
discription_df

Unnamed: 0,Disease,Description
0,drug reaction,An adverse drug reaction (ADR) is an injury ca...
1,malaria,An infectious disease caused by protozoan para...
2,allergy,An allergy is an immune system response to a f...
3,hypothyroidism,"Hypothyroidism, also called underactive thyroi..."
4,psoriasis,Psoriasis is a common skin disorder that forms...
5,gerd,"Gastroesophageal reflux disease, or GERD, is a..."
6,chronic cholestasis,"Chronic cholestatic diseases, whether occurrin..."
7,hepatitis a,Hepatitis A is a highly contagious liver infec...
8,osteoarthristis,Osteoarthritis is the most common form of arth...
9,(vertigo) paroymsal positional vertigo,Benign paroxysmal positional vertigo (BPPV) is...


In [152]:
discription_df.rename(columns={"Description": "description","Disease": "disease"}, inplace=True)
discription_df.set_index('disease', inplace=True)
discription_df.sort_index(inplace=True)

df = discription_df

df.head()

Unnamed: 0_level_0,description
disease,Unnamed: 1_level_1
(vertigo) paroymsal positional vertigo,Benign paroxysmal positional vertigo (BPPV) is...
acne,"Acne vulgaris is the formation of comedones, p..."
aids,Acquired immunodeficiency syndrome (AIDS) is a...
alcoholic hepatitis,"Alcoholic hepatitis is a diseased, inflammator..."
allergy,An allergy is an immune system response to a f...


In [153]:
precaution_df = pd.read_csv('disease_precaution.csv')
precaution_df['Disease'] = precaution_df['Disease'].apply(clean_word)
precaution_df.set_index('Disease',inplace=True)
precaution_df.sort_index(inplace=True)
precaution_df.head()

Unnamed: 0_level_0,Precaution_1,Precaution_2,Precaution_3,Precaution_4
Disease,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
(vertigo) paroymsal positional vertigo,lie down,avoid sudden change in body,avoid abrupt head movment,relax
acne,bath twice,avoid fatty spicy food,drink plenty of water,avoid too many products
aids,avoid open cuts,wear ppe if possible,consult doctor,follow up
alcoholic hepatitis,stop alcohol consumption,consult doctor,medication,follow up
allergy,apply calamine,cover area with bandage,,use ice to compress itching


In [154]:
def combine(data):
	s=''
	for x in data:
		if type(x) != float:
			s = s + "," + x
	return s[1:]

In [155]:
df['precautions'] = precaution_df.agg(combine,axis=1)
df

Unnamed: 0_level_0,description,precautions
disease,Unnamed: 1_level_1,Unnamed: 2_level_1
(vertigo) paroymsal positional vertigo,Benign paroxysmal positional vertigo (BPPV) is...,"lie down,avoid sudden change in body,avoid abr..."
acne,"Acne vulgaris is the formation of comedones, p...","bath twice,avoid fatty spicy food,drink plenty..."
aids,Acquired immunodeficiency syndrome (AIDS) is a...,"avoid open cuts,wear ppe if possible,consult d..."
alcoholic hepatitis,"Alcoholic hepatitis is a diseased, inflammator...","stop alcohol consumption,consult doctor,medica..."
allergy,An allergy is an immune system response to a f...,"apply calamine,cover area with bandage,use ice..."
arthritis,Arthritis is the swelling and tenderness of on...,"exercise,use hot and cold therapy,try acupunct..."
bronchial asthma,Bronchial asthma is a medical condition which ...,"switch to loose cloothing,take deep breaths,ge..."
cervical spondylosis,Cervical spondylosis is a general term for age...,"use heating pad or cold pack,exercise,take otc..."
chicken pox,Chickenpox is a highly contagious disease caus...,"use neem in bathing ,consume neem leaves,take ..."
chronic cholestasis,"Chronic cholestatic diseases, whether occurrin...","cold baths,anti itch medicine,consult doctor,e..."


In [156]:
df.dtypes


description    object
precautions    object
dtype: object

In [157]:
df.iloc[13,1] = "avoid fatty spicy food,consume witch hazel,warm bath with epsom salt,consume alovera juice"

In [158]:
df.isna().sum()

description    0
precautions    0
dtype: int64

In [159]:
df

Unnamed: 0_level_0,description,precautions
disease,Unnamed: 1_level_1,Unnamed: 2_level_1
(vertigo) paroymsal positional vertigo,Benign paroxysmal positional vertigo (BPPV) is...,"lie down,avoid sudden change in body,avoid abr..."
acne,"Acne vulgaris is the formation of comedones, p...","bath twice,avoid fatty spicy food,drink plenty..."
aids,Acquired immunodeficiency syndrome (AIDS) is a...,"avoid open cuts,wear ppe if possible,consult d..."
alcoholic hepatitis,"Alcoholic hepatitis is a diseased, inflammator...","stop alcohol consumption,consult doctor,medica..."
allergy,An allergy is an immune system response to a f...,"apply calamine,cover area with bandage,use ice..."
arthritis,Arthritis is the swelling and tenderness of on...,"exercise,use hot and cold therapy,try acupunct..."
bronchial asthma,Bronchial asthma is a medical condition which ...,"switch to loose cloothing,take deep breaths,ge..."
cervical spondylosis,Cervical spondylosis is a general term for age...,"use heating pad or cold pack,exercise,take otc..."
chicken pox,Chickenpox is a highly contagious disease caus...,"use neem in bathing ,consume neem leaves,take ..."
chronic cholestasis,"Chronic cholestatic diseases, whether occurrin...","cold baths,anti itch medicine,consult doctor,e..."


In [160]:
df.to_csv("../data.csv")