In [82]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score , confusion_matrix 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

In [128]:
df= pd.read_csv('migraine_balanced.csv')

In [129]:
x = df.drop('Type' , axis = 1)
y = df['Type']
#data splitting 
X_train , X_test , Y_train , Y_test = train_test_split(x, y ,test_size=0.2 , random_state=42)

In [130]:
#building Random forest model
rf_model = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    random_state=42
)
# training the model 
rf_model.fit(X_train,Y_train)

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [131]:
#prediction for the testing data
Y_pred = rf_model.predict(X_test)
#accuracy evaluation for Random forest 
accuracy_rt = accuracy_score(Y_test,Y_pred) * 100
conf_matrix_rt = confusion_matrix(Y_test, Y_pred)
print(f"Random forest accuracy {accuracy_rt:.4f}")
print("Confusion Matrix\n", conf_matrix_rt)

Random forest accuracy 96.2428
Confusion Matrix
 [[58  0  1  0  1  2  0]
 [ 0 47  0  0  0  1  0]
 [ 0  0 44  0  0  0  0]
 [ 0  0  1 49  0  0  0]
 [ 0  0  0  0 53  0  0]
 [ 0  2  1  0  4 37  0]
 [ 0  0  0  0  0  0 45]]


In [132]:
#Standard scalar for KNN model
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

X_test_scaled = scaler.transform(X_test)

X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)

X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

In [133]:
#KNN model 
k_value = 5
knn_model = KNeighborsClassifier(n_neighbors=k_value)
knn_model.fit(X_train_scaled , Y_train)

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [134]:
#prediction and evaluation
Y_pred = knn_model.predict(X_test_scaled)
accuracy_knn = accuracy_score(Y_test,Y_pred) * 100
conf_knn = confusion_matrix(Y_test,Y_pred)
print(f"KNN accuracy {accuracy_knn:.4f}")
print("Confusion Matrix\n", conf_knn)

KNN accuracy 92.4855
Confusion Matrix
 [[59  0  0  0  1  2  0]
 [ 2 45  0  0  1  0  0]
 [ 1  0 43  0  0  0  0]
 [ 0  0  0 50  0  0  0]
 [ 4  0  0  0 48  1  0]
 [ 4  4  1  0  5 30  0]
 [ 0  0  0  0  0  0 45]]


In [135]:
#Making and training naive bayes model
gnb_model = GaussianNB()
gnb_model.fit(X_train,Y_train)

0,1,2
,priors,
,var_smoothing,1e-09


In [136]:
#accuracy and evaluation for naive bayes
Y_pred = gnb_model.predict(X_test)
accuracy_nb = accuracy_score(Y_test , Y_pred) * 100
conf_nb = confusion_matrix(Y_test,Y_pred)
print(f"Naive Bayes accuracy {accuracy_nb:.4f}")
print("Confusion Matrix\n", conf_nb)

Naive Bayes accuracy 71.6763
Confusion Matrix
 [[ 7 12  5  0 13 25  0]
 [ 0 17  2  0  0 29  0]
 [ 0  0 44  0  0  0  0]
 [ 0  0  1 49  0  0  0]
 [ 0  0  0  0 45  8  0]
 [ 1  0  0  0  2 41  0]
 [ 0  0  0  0  0  0 45]]


In [137]:
# this is desicion tree
dt_model=DecisionTreeClassifier(
    criterion='gini' ,
    max_depth=5,
    random_state=42
)
dt_model.fit(X_train,Y_train)

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [138]:
#evaluation for desicion tree
Y_pred = dt_model.predict(X_test)

accuracy_dt = accuracy_score(Y_test, Y_pred) * 100
conf_matrix_dt = confusion_matrix(Y_test, Y_pred)
print(f"Acurracy is {accuracy_dt:.4f}")
print("Confusion matrix\n",conf_matrix_dt)

Acurracy is 73.9884
Confusion matrix
 [[15 15  9  0  4 19  0]
 [ 0 48  0  0  0  0  0]
 [ 0  0 44  0  0  0  0]
 [ 0  3  1 44  0  2  0]
 [ 0  0  8  0 40  5  0]
 [11  3  1  0  9 20  0]
 [ 0  0  0  0  0  0 45]]
