# Six Classifier Models of Supervised ML

# 1.Decision Tree 

In [2]:
# import libraries
import numpy as np
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree 
from sklearn.metrics import classification_report, confusion_matrix

# Load Data for ML

In [4]:
df = sns.load_dataset('titanic')
X = df[['pclass','sex','age','sibsp','parch','fare']]
y = df['survived']

In [5]:
X = pd.get_dummies(X,columns=['sex']) # one hot encoding for categorical data to numerical data conversion 

In [6]:
X

Unnamed: 0,pclass,age,sibsp,parch,fare,sex_female,sex_male
0,3,22.0,1,0,7.2500,False,True
1,1,38.0,1,0,71.2833,True,False
2,3,26.0,0,0,7.9250,True,False
3,1,35.0,1,0,53.1000,True,False
4,3,35.0,0,0,8.0500,False,True
...,...,...,...,...,...,...,...
886,2,27.0,0,0,13.0000,False,True
887,1,19.0,0,0,30.0000,True,False
888,3,,1,2,23.4500,True,False
889,1,26.0,0,0,30.0000,False,True


In [None]:
df.isnull().sum()
X.age.fillna(value=X.age.mean(),inplace=True)

# Train Test Split Function

In [None]:
# TRAIN TEST 
# split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42)

# Create and Train Model

In [None]:
model = DecisionTreeClassifier()
model.fit(X_train,y_train)

In [None]:
y_pred = model.predict(X_test)

# Check Score of Model 

## 1.Precision measures the proportion of true positive predictions among all positive predictions
## 2.Recall measures the proportion of true positive predictions among all true positive instances

In [None]:
# evaluate performance of model
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
print('Accuracy Score : ',accuracy_score(y_test,y_pred))
print('Precision Score : ',precision_score(y_test,y_pred))
print('Recall Score : ',recall_score(y_test,y_pred))
print('F1 Score : ',f1_score(y_test,y_pred)) # harmonic mean of 

# To Print Decision Tree Plot 

In [None]:
tree.plot_tree(model.fit(X,y),filled=True)
plt.show()
plt.savefig('tree.png')

In [None]:
plt.figure(figsize=(20,10)) # width : 20 and inch : 10
tree.plot_tree(model.fit(X,y),filled=True)
plt.show()
plt.savefig('Decision Tree.pdf',format='pdf',dpi=300)

## Create ConfusionMatrix

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_pred)

In [None]:
sns.heatmap(cm,annot=True,fmt='d',cmap='Greens')
plt.xticks([0.5,1.5],['Not Survived','Survived'])
plt.xlabel('Predicted Label')
plt.yticks([0.5,1.5],['Not Survived','Survived'])
plt.ylabel('True Label')

# 2.K Nearest Neighbour KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print(f'y_pred : {y_pred}')
cm = confusion_matrix(y_test,y_pred)
cm

In [None]:
sns.heatmap(cm, annot=True, fmt='d', cmap='Greens')
plt.title('Confusion Matrix Heatmap')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.xticks([0.5, 1.5], ['Not Survived', 'Survived'])
plt.yticks([0.5, 1.5], ['Not Survived', 'Survived'], rotation=0)
plt.show()

# 3.Random Forest 

In [None]:
# random forest 
from sklearn.ensemble import RandomForestClassifier
sns.load_dataset('titanic')
# set X and Y
X = df[['pclass','sex','age','sibsp','parch','fare']]
y = df['survived']
# one hot encoding
X = pd.get_dummies(X,columns=['sex'])
# remove null 
X.age.fillna(value=X.age.mean(),inplace=True)
# train test split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
# create model
model = RandomForestClassifier()
model.fit(X_train,y_train)
# predict value
y_pred = model.predict(X_test)


In [None]:
cm = confusion_matrix(y_test,y_pred)
print('Confusion Matrix : ',cm)
print('Accuracy Score : ',accuracy_score(y_test,y_pred))
print('Precision Score : ',precision_score(y_test,y_pred))
print('Recall Score : ',recall_score(y_test,y_pred))
print('F1 Score : ',f1_score(y_test,y_pred))

# 4.Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print('Confusion Matrix : ',cm)
print('Accuracy Score : ',accuracy_score(y_test,y_pred))
print('Precision Score : ',precision_score(y_test,y_pred))
print('Recall Score : ',recall_score(y_test,y_pred))
print('F1 Score : ',f1_score(y_test,y_pred))

# 5.Standard Vector Machine

In [None]:
from sklearn.svm import SVC
model = SVC().fit(X_train,y_train)
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print(cm)
print(f'accuracy_score : {accuracy_score(y_test,y_pred)}')
print(f'f1 : {f1_score(y_test,y_pred)}')
print(f'precision_score : {precision_score(y_test,y_pred)}')

# 6.Naive Bayes

In [43]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print(cm)
print(f'accuracy_score : {accuracy_score(y_test,y_pred)}')
print(f'f1 : {f1_score(y_test,y_pred)}')
print(f'precision_score : {precision_score(y_test,y_pred)}')

[[88 17]
 [21 53]]
accuracy_score : 0.7877094972067039
f1 : 0.7361111111111112
precision_score : 0.7571428571428571
