In [30]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score, fbeta_score

In [2]:
social_df = pd.read_csv('Assignment Datasets/Social_Network_Ads.csv')

In [4]:
social_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


In [5]:
social_df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [6]:
social_df['Purchased'].value_counts()

0    257
1    143
Name: Purchased, dtype: int64

In [15]:
social_df = pd.get_dummies(social_df, columns=['Gender'])

In [16]:
from sklearn.preprocessing import StandardScaler

In [17]:
stand = StandardScaler()

In [18]:
X = social_df.drop(['Purchased', 'User ID'], axis=1)
y = social_df['Purchased']

In [19]:
from sklearn.model_selection import train_test_split


In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=132)

In [21]:
stand.fit(X_train)
X_train = stand.transform(X_train)
X_test = stand.transform(X_test)

In [23]:
social_df.head()

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Gender_Female,Gender_Male
0,15624510,19,19000,0,0,1
1,15810944,35,20000,0,0,1
2,15668575,26,43000,0,1,0
3,15603246,27,57000,0,1,0
4,15804002,19,76000,0,0,1


In [26]:
X_train

array([[ 0.42674811,  0.15779102,  1.01892912, -1.01892912],
       [-0.79504965,  0.30525927, -0.98142253,  0.98142253],
       [-0.41911188, -0.78600575, -0.98142253,  0.98142253],
       ...,
       [ 0.14479478, -0.8154994 , -0.98142253,  0.98142253],
       [ 0.14479478,  0.27576562,  1.01892912, -1.01892912],
       [-0.0431741 , -0.37309466,  1.01892912, -1.01892912]])

In [38]:
models = {
    "LR" : LogisticRegression(),
    "KNN": KNeighborsClassifier(),
    'DT' : DecisionTreeClassifier(),
    'SVC' : SVC(),
    'GNB' : GaussianNB(),
    'XGC' : XGBClassifier(use_label_encoder=False),
    'RF' : RandomForestClassifier(),
}

In [39]:
for name, model in models.items():
    print(f'Using : {name}')
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f'Training Accuracy : {accuracy_score(y_train, model.predict(X_train))}')
    print(f'Testing Accuracy : {accuracy_score(y_test, y_pred)}')
    print(f'Confusion Matrix :\n{confusion_matrix(y_test, y_pred)}')
    print(f'Recall : {recall_score(y_test, y_pred)}')
    print(f'Precision : {precision_score(y_test, y_pred)}')
    print(f'F1 Score : {f1_score(y_test, y_pred)}')
    print(f'Fbeta_Score : {fbeta_score(y_test, y_pred, beta=0.5)}')
        

Using : LR
Training Accuracy : 0.85625
Testing Accuracy : 0.8375
Confusion Matrix :
[[47  6]
 [ 7 20]]
Recall : 0.7407407407407407
Precision : 0.7692307692307693
F1 Score : 0.7547169811320754
Fbeta_Score : 0.7633587786259542
Using : KNN
Training Accuracy : 0.9375
Testing Accuracy : 0.85
Confusion Matrix :
[[45  8]
 [ 4 23]]
Recall : 0.8518518518518519
Precision : 0.7419354838709677
F1 Score : 0.7931034482758621
Fbeta_Score : 0.7615894039735099
Using : DT
Training Accuracy : 1.0
Testing Accuracy : 0.875
Confusion Matrix :
[[47  6]
 [ 4 23]]
Recall : 0.8518518518518519
Precision : 0.7931034482758621
F1 Score : 0.8214285714285715
Fbeta_Score : 0.8041958041958043
Using : SVC
Training Accuracy : 0.925
Testing Accuracy : 0.8375
Confusion Matrix :
[[45  8]
 [ 5 22]]
Recall : 0.8148148148148148
Precision : 0.7333333333333333
F1 Score : 0.7719298245614035
Fbeta_Score : 0.7482993197278911
Using : GNB
Training Accuracy : 0.9
Testing Accuracy : 0.8875
Confusion Matrix :
[[47  6]
 [ 3 24]]
Recall :

In [54]:
titanic_df =pd.read_csv('Assignment Datasets/Titanic.csv')

In [55]:
titanic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [56]:
titanic_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [57]:
from sklearn.impute import KNNImputer
from sklearn.impute import SimpleImputer

In [58]:
knn_impute = KNNImputer()
simp_impute = SimpleImputer(strategy='most_frequent')

In [59]:
titanic_df['Age'] = knn_impute.fit_transform(titanic_df[['Age']])
titanic_df['Embarked'] = simp_impute.fit_transform(titanic_df[['Embarked']])
titanic_df['Cabin'] = simp_impute.fit_transform(titanic_df[['Cabin']])

In [60]:
titanic_df = pd.get_dummies(titanic_df, columns=['Sex', 'Cabin', 'Embarked'])

In [61]:
titanic_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Age,SibSp,Parch,Ticket,Fare,Sex_female,Sex_male,Cabin_A10,Cabin_A14,Cabin_A16,Cabin_A19,Cabin_A20,Cabin_A23,Cabin_A24,Cabin_A26,Cabin_A31,Cabin_A32,Cabin_A34,Cabin_A36,Cabin_A5,Cabin_A6,Cabin_A7,Cabin_B101,Cabin_B102,Cabin_B18,Cabin_B19,Cabin_B20,Cabin_B22,Cabin_B28,Cabin_B3,Cabin_B30,Cabin_B35,Cabin_B37,Cabin_B38,Cabin_B39,Cabin_B4,Cabin_B41,Cabin_B42,Cabin_B49,Cabin_B5,Cabin_B50,Cabin_B51 B53 B55,Cabin_B57 B59 B63 B66,Cabin_B58 B60,Cabin_B69,Cabin_B71,Cabin_B73,Cabin_B77,Cabin_B78,Cabin_B79,Cabin_B80,Cabin_B82 B84,Cabin_B86,Cabin_B94,Cabin_B96 B98,Cabin_C101,...,Cabin_D,Cabin_D10 D12,Cabin_D11,Cabin_D15,Cabin_D17,Cabin_D19,Cabin_D20,Cabin_D21,Cabin_D26,Cabin_D28,Cabin_D30,Cabin_D33,Cabin_D35,Cabin_D36,Cabin_D37,Cabin_D45,Cabin_D46,Cabin_D47,Cabin_D48,Cabin_D49,Cabin_D50,Cabin_D56,Cabin_D6,Cabin_D7,Cabin_D9,Cabin_E10,Cabin_E101,Cabin_E12,Cabin_E121,Cabin_E17,Cabin_E24,Cabin_E25,Cabin_E31,Cabin_E33,Cabin_E34,Cabin_E36,Cabin_E38,Cabin_E40,Cabin_E44,Cabin_E46,Cabin_E49,Cabin_E50,Cabin_E58,Cabin_E63,Cabin_E67,Cabin_E68,Cabin_E77,Cabin_E8,Cabin_F E69,Cabin_F G63,Cabin_F G73,Cabin_F2,Cabin_F33,Cabin_F38,Cabin_F4,Cabin_G6,Cabin_T,Embarked_C,Embarked_Q,Embarked_S
0,1,0,3,"Braund, Mr. Owen Harris",22.0,1,0,A/5 21171,7.25,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0,1,0,PC 17599,71.2833,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,3,1,3,"Heikkinen, Miss. Laina",26.0,0,0,STON/O2. 3101282,7.925,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0,1,0,113803,53.1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,5,0,3,"Allen, Mr. William Henry",35.0,0,0,373450,8.05,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [62]:
x = titanic_df.drop(['Name', 'Ticket', 'Survived', 'PassengerId'], axis=1)
y = titanic_df['Survived']

In [63]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=123)

In [64]:
stand.fit(x_train)
x_train = stand.transform(x_train)
x_test = stand.transform(x_test)

In [65]:
for name, model in models.items():
    print(f'Using : {name}')
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    print(f'Training Accuracy : {accuracy_score(y_train, model.predict(x_train))}')
    print(f'Testing Accuracy : {accuracy_score(y_test, y_pred)}')
    print(f'Confusion Matrix :\n{confusion_matrix(y_test, y_pred)}')
    print(f'Recall : {recall_score(y_test, y_pred)}')
    print(f'Precision : {precision_score(y_test, y_pred)}')
    print(f'F1 Score : {f1_score(y_test, y_pred)}')
    print(f'Fbeta_Score : {fbeta_score(y_test, y_pred, beta=0.5)}')

Using : LR
Training Accuracy : 0.8426966292134831
Testing Accuracy : 0.8100558659217877
Confusion Matrix :
[[94 20]
 [14 51]]
Recall : 0.7846153846153846
Precision : 0.7183098591549296
F1 Score : 0.75
Fbeta_Score : 0.7306590257879657
Using : KNN
Training Accuracy : 0.851123595505618
Testing Accuracy : 0.8100558659217877
Confusion Matrix :
[[93 21]
 [13 52]]
Recall : 0.8
Precision : 0.7123287671232876
F1 Score : 0.7536231884057971
Fbeta_Score : 0.7282913165266106
Using : DT
Training Accuracy : 0.9845505617977528
Testing Accuracy : 0.7877094972067039
Confusion Matrix :
[[92 22]
 [16 49]]
Recall : 0.7538461538461538
Precision : 0.6901408450704225
F1 Score : 0.7205882352941175
Fbeta_Score : 0.7020057306590258
Using : SVC
Training Accuracy : 0.8258426966292135
Testing Accuracy : 0.7988826815642458
Confusion Matrix :
[[97 17]
 [19 46]]
Recall : 0.7076923076923077
Precision : 0.7301587301587301
F1 Score : 0.7187500000000001
Fbeta_Score : 0.7255520504731862
Using : GNB
Training Accuracy : 0.73