# Importing Dependences

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix ,classification_report
from sklearn.ensemble import RandomForestClassifier

# Loading Data

In [2]:
data  = pd.read_csv("dataset.csv")
data_sevrity = pd.read_csv("Symptom-severity.csv")

In [3]:
data_sevrity.set_index('Symptom').head()

Unnamed: 0_level_0,weight
Symptom,Unnamed: 1_level_1
itching,1
skin_rash,3
nodal_skin_eruptions,4
continuous_sneezing,4
shivering,5


# Convert data_severity to Dictionary

In [4]:
# Setting 'Symptom' as the index 
# Using .T.to_dict() to create a nested dictionary

data_dict = data_sevrity.set_index('Symptom').T.to_dict()

  data_dict = data_sevrity.set_index('Symptom').T.to_dict()


In [5]:
data_dict.keys()

dict_keys(['itching', 'skin_rash', 'nodal_skin_eruptions', 'continuous_sneezing', 'shivering', 'chills', 'joint_pain', 'stomach_pain', 'acidity', 'ulcers_on_tongue', 'muscle_wasting', 'vomiting', 'burning_micturition', 'spotting_urination', 'fatigue', 'weight_gain', 'anxiety', 'cold_hands_and_feets', 'mood_swings', 'weight_loss', 'restlessness', 'lethargy', 'patches_in_throat', 'irregular_sugar_level', 'cough', 'high_fever', 'sunken_eyes', 'breathlessness', 'sweating', 'dehydration', 'indigestion', 'headache', 'yellowish_skin', 'dark_urine', 'nausea', 'loss_of_appetite', 'pain_behind_the_eyes', 'back_pain', 'constipation', 'abdominal_pain', 'diarrhoea', 'mild_fever', 'yellow_urine', 'yellowing_of_eyes', 'acute_liver_failure', 'fluid_overload', 'swelling_of_stomach', 'swelled_lymph_nodes', 'malaise', 'blurred_and_distorted_vision', 'phlegm', 'throat_irritation', 'redness_of_eyes', 'sinus_pressure', 'runny_nose', 'congestion', 'chest_pain', 'weakness_in_limbs', 'fast_heart_rate', 'pain_d

In [6]:
data.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,


# Removing space between words 

In [7]:
# use strip() to remove any unwanted spaces or enters before or after the word 

def remove_space_between_word(dataset):
    for col in dataset.columns:
        for i in range(len(dataset[col])):
            if (type(dataset[col][i]) == str ):
                dataset[col][i] = dataset[col][i].strip()
                dataset[col][i] = dataset[col][i].replace(" ", "_")
    return data

In [8]:
new_df = remove_space_between_word(data)
new_df.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal_infection,itching,skin_rash,nodal_skin_eruptions,dischromic__patches,,,,,,,,,,,,,
1,Fungal_infection,skin_rash,nodal_skin_eruptions,dischromic__patches,,,,,,,,,,,,,,
2,Fungal_infection,itching,nodal_skin_eruptions,dischromic__patches,,,,,,,,,,,,,,
3,Fungal_infection,itching,skin_rash,dischromic__patches,,,,,,,,,,,,,,
4,Fungal_infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,


# Replacing the symptoms by their weight

In [9]:
def encode_data(dataset , data_dict_weigth):
    cols = dataset.columns
    for columnName in cols:
        for i in range(len(dataset[columnName])):
            try:
            #print(data_dict[data2[columnName][i]]["weight"])
                dataset[columnName][i] = data_dict[dataset[columnName][i]]["weight"]
            except:
                pass
    dataset = dataset.fillna(0) # put empty cell to 0
    dataset = dataset.replace("foul_smell_of_urine" , 5)
    dataset = dataset.replace("dischromic__patches" , 6)
    dataset = dataset.replace("spotting__urination" , 6)
    return dataset

In [35]:
df = encode_data(new_df , data_dict)
df.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal_infection,1,3,4,6,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Fungal_infection,3,4,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Fungal_infection,1,4,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Fungal_infection,1,3,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Fungal_infection,1,3,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Check if all the Symptoms are replace by their weigth

In [36]:
df.dtypes

Disease       object
Symptom_1      int64
Symptom_2      int64
Symptom_3      int64
Symptom_4      int64
Symptom_5      int64
Symptom_6      int64
Symptom_7      int64
Symptom_8      int64
Symptom_9      int64
Symptom_10     int64
Symptom_11     int64
Symptom_12     int64
Symptom_13     int64
Symptom_14     int64
Symptom_15     int64
Symptom_16     int64
Symptom_17     int64
dtype: object

### All the columns are 'int' expect Disease, meaning all the Symptoms have been replace by their weights

# Splitng the Data

In [37]:
x = df.drop('Disease' , axis =1)
y = data["Disease"]

In [38]:
x_train, x_test, y_train, y_test = train_test_split(x, y, shuffle=True, train_size = 0.70)

# Scaling the Data

In [40]:
scaler = StandardScaler()

scaler.fit(x_train)
x_train = scaler.transform(x_train)

x_test = scaler.transform(x_test)


# Training the Model

In [41]:
randomFC = RandomForestClassifier()
randomFC.fit(x_train, y_train)
result = randomFC.predict(x_test)

# Testing the Model

In [42]:
print(classification_report(y_test, result))
print('---------------------------------------------------------------------------------')
print('F1-score% =', f1_score(y_test, result, average='macro')*100, '|', 'Accuracy% =', accuracy_score(y_test, result)*100)

                                         precision    recall  f1-score   support

(vertigo)_Paroymsal__Positional_Vertigo       1.00      1.00      1.00        35
                                   AIDS       1.00      1.00      1.00        35
                                   Acne       1.00      1.00      1.00        39
                    Alcoholic_hepatitis       1.00      1.00      1.00        42
                                Allergy       1.00      0.76      0.87        34
                              Arthritis       1.00      1.00      1.00        32
                       Bronchial_Asthma       1.00      1.00      1.00        37
                   Cervical_spondylosis       0.94      1.00      0.97        29
                            Chicken_pox       1.00      1.00      1.00        27
                    Chronic_cholestasis       1.00      1.00      1.00        37
                            Common_Cold       1.00      1.00      1.00        34
                           

# Testing for individual data

### Scaled Input Passed to the Model

In [43]:
Input = [[-0.30754719,  2.34972433, -0.13732875, -1.06139671, -0.10295526,
       -1.12685907, -0.88774374, -0.77166929, -0.69583021, -0.63920226,
       -0.52509095, -0.39356317, -0.32484926, -0.24829141, -0.21977579,
       -0.19079031, -0.12013738]]
print(Input)

[[-0.30754719, 2.34972433, -0.13732875, -1.06139671, -0.10295526, -1.12685907, -0.88774374, -0.77166929, -0.69583021, -0.63920226, -0.52509095, -0.39356317, -0.32484926, -0.24829141, -0.21977579, -0.19079031, -0.12013738]]


### Final Output from the model

In [44]:
Output = randomFC.predict(Input)
Output

array(['Impetigo'], dtype=object)

# Providing Solution for the Disease

In [45]:
sol = pd.read_csv('solution.csv')
sol.head()

Unnamed: 0,Disease,Precaution_1,Precaution_2,Precaution_3,Precaution_4,Description
0,Drug Reaction,stop irritation,consult nearest hospital,stop taking drug,follow up,An adverse drug reaction (ADR) is an injury ca...
1,Malaria,Consult nearest hospital,avoid oily food,avoid non veg food,keep mosquitos out,An infectious disease caused by protozoan para...
2,Allergy,apply calamine,cover area with bandage,,use ice to compress itching,An allergy is an immune system response to a f...
3,Hypothyroidism,reduce stress,exercise,eat healthy,get proper sleep,"Hypothyroidism, also called underactive thyroi..."
4,Psoriasis,wash hands with warm soapy water,stop bleeding using pressure,consult doctor,salt baths,Psoriasis is a common skin disorder that forms...


In [46]:
def Description(Output):
    Model_Output = ' '.join(Output)
    for i in range(0, 41):
        if sol['Disease'][i] == Model_Output:
            return sol['Description'][i]
            

In [48]:
def Precaution(Output):
    Model_Output = ' '.join(Output)
    for i in range(0, 41):
        if sol['Disease'][i] == Model_Output:
            return  sol['Precaution_1'][i], sol['Precaution_2'][i], sol['Precaution_3'][i], sol['Precaution_4'][i]

### Description for the disease

In [25]:
Description(Output)

"Impetigo (im-puh-TIE-go) is a common and highly contagious skin infection that mainly affects infants and children. Impetigo usually appears as red sores on the face, especially around a child's nose and mouth, and on hands and feet. The sores burst and develop honey-colored crusts."

### Treatment for the Disease

In [49]:
Precaution(Output)

('soak affected area in warm water',
 'use antibiotics',
 'remove scabs with wet compressed cloth',
 'consult doctor')

# Saving the model for deploynment 

In [14]:
import joblib

### Save the Scaling function

In [28]:
# joblib.dump(scaler,'Scaler.pkl')

['Scaler.pkl']

### Save the Model 

In [29]:
# joblib.dump(randomFC,'Model.pkl')

['Model.pkl']

### Saving the Symptoms

In [15]:
# joblib.dump(list(data_sevrity['Symptom']),'Symptoms.pkl')

['Symptoms.pkl']

### Saving Input transformer

In [21]:
# joblib.dump(data_sevrity,'input_transformer.pkl')

['input_transformer.pkl']