In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score , confusion_matrix 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
df= pd.read_csv('migraine_symptom_classification.csv')
df.shape

(400, 24)

In [15]:
x = df.drop('Type' , axis = 1)
y = df['Type']
#data splitting 
X_train , X_test , Y_train , Y_test = train_test_split(
    x, y ,test_size=0.2 , random_state=42
)

In [16]:
#building Random forest model
rf_model = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    random_state=42
)
# training the model 
rf_model.fit(X_train,Y_train)

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [17]:
#prediction for the testing data
Y_pred = rf_model.predict(X_test)

In [18]:
#accuracy evaluation for Random forest 
accuracy = accuracy_score(Y_test,Y_pred) * 100
conf_matrix = confusion_matrix(Y_test, Y_pred)
print(f"Random forest accuracy {accuracy:.4f}")
print("Confusion Matrix\n", conf_matrix)

Random forest accuracy 92.5000
Confusion Matrix
 [[ 5  1  0  0  0  0  0]
 [ 0  2  1  0  0  0  0]
 [ 0  0 13  0  0  0  0]
 [ 0  0  1  3  0  0  0]
 [ 0  0  0  0  0  2  0]
 [ 0  0  0  0  1 48  0]
 [ 0  0  0  0  0  0  3]]


In [19]:
#Standard scalar for KNN model
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

X_test_scaled = scaler.transform(X_test)

X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)

X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

print(X_train_scaled.head())

        Age  Duration  Frequency  Location  Character  Intensity    Nausea  \
0  1.085305  1.824081   1.588069  0.114859   0.101136   0.671163  0.097282   
1  1.003240  1.824081   1.588069  0.114859   0.101136   0.671163  0.097282   
2  1.905951 -0.817122  -0.826098  0.114859   0.101136  -0.607243  0.097282   
3 -1.376634 -0.817122  -0.826098  0.114859   0.101136  -0.607243  0.097282   
4  0.264658 -0.817122  -0.826098 -3.560632  -3.494823  -3.164054  0.097282   

      Vomit  Phonophobia  Photophobia  ...  Dysarthria   Vertigo  Tinnitus  \
0 -0.693889     0.138233     0.125988  ...   -0.055989  2.765619 -0.258199   
1 -0.693889     0.138233     0.125988  ...   -0.055989 -0.361583 -0.258199   
2  1.441153     0.138233     0.125988  ...   -0.055989 -0.361583 -0.258199   
3 -0.693889     0.138233     0.125988  ...   -0.055989 -0.361583 -0.258199   
4  1.441153     0.138233     0.125988  ...   -0.055989 -0.361583 -0.258199   

   Hypoacusis  Diplopia    Defect  Ataxia  Conscience  Paresth

In [20]:
#KNN model 
k_value = 5
knn_model = KNeighborsClassifier(n_neighbors=k_value)
knn_model.fit(X_train_scaled , Y_train)

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [21]:
#prediction and evaluation
Y_pred = knn_model.predict(X_test_scaled)
accuracy = accuracy_score(Y_test,Y_pred) * 100
conf = confusion_matrix(Y_test,Y_pred)
print(f"KNN accuracy {accuracy:.4f}")
print("Confusion Matrix\n", conf_matrix)

KNN accuracy 83.7500
Confusion Matrix
 [[ 5  1  0  0  0  0  0]
 [ 0  2  1  0  0  0  0]
 [ 0  0 13  0  0  0  0]
 [ 0  0  1  3  0  0  0]
 [ 0  0  0  0  0  2  0]
 [ 0  0  0  0  1 48  0]
 [ 0  0  0  0  0  0  3]]


In [22]:
#Making and training naive bayes model
gnb_model = GaussianNB()
gnb_model.fit(X_train,Y_train)

0,1,2
,priors,
,var_smoothing,1e-09


In [23]:
#accuracy and evaluation for naive bayes
Y_pred = gnb_model.predict(X_test)
acuuracy = accuracy_score(Y_test , Y_pred) * 100
conf = confusion_matrix(Y_test,Y_pred)
print(f"Naive Bayes accuracy {accuracy:.4f}")
print("Confusion Matrix\n", conf_matrix)

Naive Bayes accuracy 83.7500
Confusion Matrix
 [[ 5  1  0  0  0  0  0]
 [ 0  2  1  0  0  0  0]
 [ 0  0 13  0  0  0  0]
 [ 0  0  1  3  0  0  0]
 [ 0  0  0  0  0  2  0]
 [ 0  0  0  0  1 48  0]
 [ 0  0  0  0  0  0  3]]


In [24]:
# this is desicion tree
dt_model=DecisionTreeClassifier(
    criterion='gini' ,
    max_depth=5,
    random_state=42
)
dt_model.fit(X_train,Y_train)

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [25]:
#evaluation for desicion tree
Y_pred = dt_model.predict(X_test)

accuracy = accuracy_score(Y_test, Y_pred) * 100
conf_matrix = confusion_matrix(Y_test, Y_pred)
print(f"Acurracy is {acuuracy:.4f}")
print("Confusion matrix\n",conf_matrix)

Acurracy is 96.2500
Confusion matrix
 [[ 3  1  0  0  0  2  0]
 [ 0  1  1  0  0  1  0]
 [ 0  0 13  0  0  0  0]
 [ 0  1  2  0  0  1  0]
 [ 1  1  0  0  0  0  0]
 [ 0  0  0  0  1 48  0]
 [ 0  0  0  0  0  0  3]]
