# Importing Modules

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn

# Importing Dataset

In [None]:

data = pd.read_csv("training.csv")
data.head(5)

In [None]:
#no. of features
data.columns

In [None]:
#dropping the column Unnamed: 133 as its unnecessary
data = data.drop(columns="Unnamed: 133",axis = 0)
data.columns

In [None]:
data.describe()

In [None]:
#labels from our dataset

print(list(data["prognosis"].unique()))
print(data["prognosis"].nunique())

In [None]:
data.shape

# Data Preprocessing 

In [None]:
#checking for missing values
data.isnull().sum()

In [None]:
#mapping categorical variables to numbers
a = list(data["prognosis"].unique())
a
mapped = dict()
for i in range(len(a)):
    mapped[a[i]] = i
mapped

In [None]:
data["prognosis"] = data["prognosis"].map(mapped)

In [None]:
data

In [None]:
from collections import defaultdict
accuracy = defaultdict(list) #to map ml algorithms with their accuracies

# Data Visualisation

In [None]:
sns.histplot(x = "itching",data = data)

In [None]:
sns.heatmap(data= data,annot = True)

# Using Decision Tree 

In [None]:
X = data.iloc[:,:-1].values
y = data['prognosis'].values
X

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
X_train

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
model = dt.fit(X_train,y_train)
model

In [None]:
y_pred = model.predict(X_test)
y_pred

In [None]:
from sklearn.metrics import classification_report,accuracy_score
print(classification_report(y_test,y_pred))

In [None]:
datatest = pd.read_csv("testing.csv")
dtst = datatest.iloc[:,:-1].values
resknown = datatest['prognosis'].values
datatest.head()

In [None]:
result = model.predict(dtst)

result

In [None]:
print(classification_report(y_test,result))

In [None]:
ac = accuracy_score(result,resknown)

accuracy["decision tree"] = ac
ac

# Using Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
model = rf.fit(X_train,y_train)
y_pred = model.predict(X_test)
y_pred

In [None]:
result = model.predict(dtst)
ac = accuracy_score(result,resknown)
accuracy["Random Forest"] = ac
ac

# Using Naive Bayes Algorithm

In [None]:
from sklearn.naive_bayes import GaussianNB
gs = GaussianNB()
model= gs.fit(X_train,y_train)
model

In [None]:
y_pred = model.predict(X_test)
y_pred

In [None]:
print(classification_report(y_test,y_pred))

                                         precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00        24
                                   AIDS       1.00      1.00      1.00        22
                                   Acne       1.00      1.00      1.00        22
                    Alcoholic hepatitis       1.00      1.00      1.00        21
                                Allergy       1.00      1.00      1.00        19
                              Arthritis       1.00      1.00      1.00        22
                       Bronchial Asthma       1.00      1.00      1.00        24
                   Cervical spondylosis       1.00      1.00      1.00        28
                            Chicken pox       1.00      1.00      1.00        21
                    Chronic cholestasis       1.00      1.00      1.00        33
                            Common Cold       1.00      1.00      1.00        25
                           

In [None]:
from sklearn import metrics
acc = metrics.accuracy_score(y_pred,y_test)
acc

1.0

In [None]:
#printing the confusion matrics
from sklearn.metrics import confusion_matrix
con = metrics.confusion_matrix(y_pred,y_test)
print(con)


[[24  0  0 ...  0  0  0]
 [ 0 22  0 ...  0  0  0]
 [ 0  0 22 ...  0  0  0]
 ...
 [ 0  0  0 ... 21  0  0]
 [ 0  0  0 ...  0 24  0]
 [ 0  0  0 ...  0  0 22]]


In [None]:
#calculating the accuracy from confusion matrix
a = con.shape
correct_count=0
false_count= 0

for row in range(a[0]):
    for col in range(a[1]):
        if row==col:
            correct_count+=con[row,col]
        else:
            false_count+=con[row,col]
total_count = correct_count+false_count
print("Correct Prediction",correct_count)
print("Wrong Prediction",false_count)
print("Accuracy Score ",correct_count/total_count)

Correct Prediction 984
Wrong Prediction 0
Accuracy Score  1.0


In [None]:
result= model.predict(dtst)
result



array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Jaundice',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Typhoid', 'Typhoid', 'Hepatitis D',
       'Hepatitis D', 'Hepatitis C', 'Hepatitis D', 'Hepatitis D',
       'Alcoholic hepatitis', 'Hepatitis D', 'Typhoid',
       'Bronchial Asthma', 'Dimorphic hemmorhoids(piles)', 'Heart attack',
       'Varicose veins', 'Hepatitis D', 'Jaundice', 'Hepatitis D',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo', 'Jaundice'],
      dtype='<U39')

In [None]:
#finding accuracy of the test data
ac = metrics.accuracy_score(resknown,result)
accuracy["Naive Bayes"] = ac
ac

0.7142857142857143

# Using Support Vector Machine

In [None]:
from sklearn.svm import SVC
s = SVC()
model = s.fit(X_train,y_train)
y_pred = model.predict(X_test)

In [None]:
print(classification_report(y_test,y_pred))

                                         precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00        24
                                   AIDS       1.00      1.00      1.00        22
                                   Acne       1.00      1.00      1.00        22
                    Alcoholic hepatitis       1.00      1.00      1.00        21
                                Allergy       1.00      1.00      1.00        19
                              Arthritis       1.00      1.00      1.00        22
                       Bronchial Asthma       1.00      1.00      1.00        24
                   Cervical spondylosis       1.00      1.00      1.00        28
                            Chicken pox       1.00      1.00      1.00        21
                    Chronic cholestasis       1.00      1.00      1.00        33
                            Common Cold       1.00      1.00      1.00        25
                           

In [None]:
result = model.predict(dtst)
ac = accuracy_score(resknown,result)
ac
accuracy["SVM"] = ac

In [None]:
print(accuracy)

defaultdict(<class 'list'>, {'Decision tree': 1.0, 'decision tree': 0.9761904761904762, 'Random Forest': 0.9761904761904762, 'Naive Bayes': 0.7142857142857143, 'SVM': 0.21428571428571427})
