In [1]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, cohen_kappa_score
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
def classify_report(ytest,y_pred):
    cm = confusion_matrix(ytest,y_pred)
    print("Confusion Matrix:\n",cm)
    error_rate = 1 - accuracy_score(ytest,y_pred)
    print("classification report:\n")
    print(classification_report(ytest,y_pred))
    print('Accuracy:\n',accuracy_score(ytest,y_pred))
    print('ErrorRate:\n',error_rate)
    print('Cohen Kappa score:\n',cohen_kappa_score(ytest,y_pred))


url ='C:/Users/Ahmadi/OneDrive/Desktop/datasets/1.1 titanic.csv'
df = pd.read_csv(url).fillna(0)
#data cleaning , dropping column
df = df.drop('Name',axis=1)
df = df.drop('SexCode',axis=1)
#create mapper
PClass_mapper = {'1st':1,'2nd':2,'3rd':3}
gender_mapper = {'female':1,'male':2}

#replacing feature values with scale
df['PClass'] = df['PClass'].replace(PClass_mapper)
df['Sex'] = df['Sex'].replace(gender_mapper)
#replacing missing values of age with mean of age
df['Age'] = np.where(df['Age']==0,np.mean(df['Age']),df['Age'])
#input features
x = df.iloc[:,:3].values
#output class
y = df.iloc[:, 3].values

#splitting the data into training and test
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2,random_state=0)


In [2]:
#using LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

lda = LDA(n_components=None)
xtrain = lda.fit_transform(xtrain,ytrain)
xtest = lda.transform(xtest)
print('original features:',x.shape[1])
print('reduced number of features:',xtrain.shape[1])
print('variance ratio:',lda.explained_variance_ratio_)

original features: 3
reduced number of features: 1
variance ratio: [1.]


In [3]:
#prediction with LDA
from sklearn.neighbors import KNeighborsClassifier
# knn model creation 
knn = KNeighborsClassifier(n_neighbors=10)

#fit model
knn.fit(xtrain,ytrain)
#perform predictions
y_pred_knn = knn.predict(xtest)

classify_report(ytest,y_pred_knn)

Confusion Matrix:
 [[158  13]
 [ 36  56]]
classification report:

              precision    recall  f1-score   support

           0       0.81      0.92      0.87       171
           1       0.81      0.61      0.70        92

    accuracy                           0.81       263
   macro avg       0.81      0.77      0.78       263
weighted avg       0.81      0.81      0.81       263

Accuracy:
 0.8136882129277566
ErrorRate:
 0.18631178707224338
Cohen Kappa score:
 0.5653185819813135


In [4]:
# using PCA
from sklearn.decomposition import PCA
#splitting the data into training and test
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2,random_state=0)

pca = PCA(n_components=0.99)
print('original features:',xtrain.shape[1])#(variance = 0.99,0.95) or features = 3)
print('original test features:',xtest.shape[1])
xtrain = pca.fit_transform(xtrain)
xtest = pca.transform(xtest)
print('reduced number of features:',xtrain.shape[1])
print('reduced number of test features:',xtest.shape[1])

original features: 3
original test features: 3
reduced number of features: 1
reduced number of test features: 1


In [5]:
#predicting with PCA
from sklearn.neighbors import KNeighborsClassifier
# KNN model creation 
knn = KNeighborsClassifier(n_neighbors=10)

#fit model
knn.fit(xtrain,ytrain)
#perform predictions
y_pred_knn = knn.predict(xtest)

classify_report(ytest,y_pred_knn)

Confusion Matrix:
 [[153  18]
 [ 69  23]]
classification report:

              precision    recall  f1-score   support

           0       0.69      0.89      0.78       171
           1       0.56      0.25      0.35        92

    accuracy                           0.67       263
   macro avg       0.63      0.57      0.56       263
weighted avg       0.64      0.67      0.63       263

Accuracy:
 0.6692015209125475
ErrorRate:
 0.3307984790874525
Cohen Kappa score:
 0.165992345544013


In [6]:
# using NMF
from sklearn.decomposition import NMF
#splitting data
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2,random_state=0)

print('original number of features:',xtrain.shape[1])
nmf = NMF(n_components=2,init='random',random_state=0)
xtrain = nmf.fit_transform(xtrain)
xtest = nmf.transform(xtest)
print('reduced number of features:',xtrain.shape[1])
print('reduced number of test features:',xtest.shape[1])

original number of features: 3
reduced number of features: 2
reduced number of test features: 2




In [7]:
#predicting with NMF
from sklearn.neighbors import KNeighborsClassifier
# KNN model creation 
knn = KNeighborsClassifier(n_neighbors=10)

#fit model
knn.fit(xtrain,ytrain)
#perform predictions
y_pred_knn = knn.predict(xtest)

classify_report(ytest,y_pred_knn)

Confusion Matrix:
 [[135  36]
 [ 26  66]]
classification report:

              precision    recall  f1-score   support

           0       0.84      0.79      0.81       171
           1       0.65      0.72      0.68        92

    accuracy                           0.76       263
   macro avg       0.74      0.75      0.75       263
weighted avg       0.77      0.76      0.77       263

Accuracy:
 0.7642585551330798
ErrorRate:
 0.23574144486692017
Cohen Kappa score:
 0.494450300737893
