In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [57]:
spaceship_titanic_df = pd.read_csv('train.csv')

In [58]:
spaceship_titanic_df.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True


In [59]:
print('Length of datasets', len(spaceship_titanic_df))

Length of datasets 8693


In [60]:
print('Columns: ', spaceship_titanic_df.columns)

Columns:  Index(['PassengerId', 'HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'Age',
       'VIP', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck',
       'Name', 'Transported'],
      dtype='object')


In [61]:
print("Shape: ", spaceship_titanic_df.shape)

Shape:  (8693, 14)


In [62]:
spaceship_titanic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8693 entries, 0 to 8692
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   PassengerId   8693 non-null   object 
 1   HomePlanet    8492 non-null   object 
 2   CryoSleep     8476 non-null   object 
 3   Cabin         8494 non-null   object 
 4   Destination   8511 non-null   object 
 5   Age           8514 non-null   float64
 6   VIP           8490 non-null   object 
 7   RoomService   8512 non-null   float64
 8   FoodCourt     8510 non-null   float64
 9   ShoppingMall  8485 non-null   float64
 10  Spa           8510 non-null   float64
 11  VRDeck        8505 non-null   float64
 12  Name          8493 non-null   object 
 13  Transported   8693 non-null   bool   
dtypes: bool(1), float64(6), object(7)
memory usage: 891.5+ KB


In [63]:
spaceship_titanic_df = spaceship_titanic_df.drop(columns=['Name'], axis=1)
spaceship_titanic_df.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,True
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,False
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,False
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,True


In [64]:
missing_records_series = spaceship_titanic_df.isnull().sum()
missing_records_series = missing_records_series[missing_records_series > 0]
missing_records_series

HomePlanet      201
CryoSleep       217
Cabin           199
Destination     182
Age             179
VIP             203
RoomService     181
FoodCourt       183
ShoppingMall    208
Spa             183
VRDeck          188
dtype: int64

In [65]:
missing_records_array = missing_records_series.index.to_numpy()
missing_records_array

array(['HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'Age', 'VIP',
       'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck'],
      dtype=object)

In [66]:
categorical_columns = spaceship_titanic_df.select_dtypes(['object']).columns
categorical_columns = categorical_columns.to_numpy()
categorical_columns

array(['PassengerId', 'HomePlanet', 'CryoSleep', 'Cabin', 'Destination',
       'VIP'], dtype=object)

In [67]:
missing_categorical_columns = missing_records_array[np.isin(missing_records_array, categorical_columns)]
missing_categorical_columns

array(['HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'VIP'],
      dtype=object)

In [68]:
print('Column\t\t\tMode')
for column in missing_categorical_columns:
  mode_value = spaceship_titanic_df[column].mode().iloc[0]
  print(column+'\t\t\t'+str(mode_value))
  spaceship_titanic_df[column].fillna(mode_value, inplace=True)

Column			Mode
HomePlanet			Earth
CryoSleep			False
Cabin			G/734/S
Destination			TRAPPIST-1e
VIP			False


In [69]:
spaceship_titanic_df[missing_categorical_columns].isnull().sum()

HomePlanet     0
CryoSleep      0
Cabin          0
Destination    0
VIP            0
dtype: int64

In [70]:
missing_numerical_record = missing_records_array[~np.isin(missing_records_array, categorical_columns)]
missing_numerical_record

array(['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck'],
      dtype=object)

In [71]:
print('Column\t\t\tMean')
for column in missing_numerical_record:
  mean_value = int(spaceship_titanic_df[column].mean())
  print(column+'\t\t\t'+str(mean_value))
  spaceship_titanic_df[column].fillna(mean_value, inplace=True)

Column			Mean
Age			28
RoomService			224
FoodCourt			458
ShoppingMall			173
Spa			311
VRDeck			304


In [72]:
spaceship_titanic_df[missing_numerical_record].isnull().sum()

Age             0
RoomService     0
FoodCourt       0
ShoppingMall    0
Spa             0
VRDeck          0
dtype: int64

In [73]:
spaceship_titanic_df[['DeckName','DeckNumber','DeckSide']] = spaceship_titanic_df['Cabin'].str.split('/',expand=True)
spaceship_titanic_df[['group','id']] = spaceship_titanic_df['PassengerId'].str.split('_',expand=True)
spaceship_titanic_df.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,DeckName,DeckNumber,DeckSide,group,id
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,False,B,0,P,1,1
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,True,F,0,S,2,1
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,False,A,0,S,3,1
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,False,A,0,S,3,2
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,True,F,1,S,4,1


In [74]:
spaceship_titanic_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age,8693.0,28.810882,14.339536,0.0,20.0,27.0,37.0,79.0
RoomService,8693.0,224.6733,659.739371,0.0,0.0,0.0,78.0,14327.0
FoodCourt,8693.0,458.075578,1594.434978,0.0,0.0,0.0,118.0,29813.0
ShoppingMall,8693.0,173.711722,597.41745,0.0,0.0,0.0,45.0,23492.0
Spa,8693.0,311.135856,1124.675871,0.0,0.0,0.0,89.0,22408.0
VRDeck,8693.0,304.836305,1133.259056,0.0,0.0,0.0,71.0,24133.0


In [75]:
spaceship_titanic_df[spaceship_titanic_df.duplicated()]

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,DeckName,DeckNumber,DeckSide,group,id


In [76]:
spaceship_titanic_df.nunique()

PassengerId     8693
HomePlanet         3
CryoSleep          2
Cabin           6560
Destination        3
Age               80
VIP                2
RoomService     1273
FoodCourt       1507
ShoppingMall    1116
Spa             1327
VRDeck          1306
Transported        2
DeckName           8
DeckNumber      1817
DeckSide           2
group           6217
id                 8
dtype: int64

In [77]:
from sklearn import preprocessing

ordinal_category = ['HomePlanet', 'Destination', 'VIP', 'Transported', 'CryoSleep','DeckName', 'DeckSide']
label_encoding = preprocessing.LabelEncoder()

for col in ordinal_category:
  spaceship_titanic_df[col] = label_encoding.fit_transform(spaceship_titanic_df[col])

spaceship_titanic_df.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,DeckName,DeckNumber,DeckSide,group,id
0,0001_01,1,0,B/0/P,2,39.0,0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,1,1
1,0002_01,0,0,F/0/S,2,24.0,0,109.0,9.0,25.0,549.0,44.0,1,5,0,1,2,1
2,0003_01,1,0,A/0/S,2,58.0,1,43.0,3576.0,0.0,6715.0,49.0,0,0,0,1,3,1
3,0003_02,1,0,A/0/S,2,33.0,0,0.0,1283.0,371.0,3329.0,193.0,0,0,0,1,3,2
4,0004_01,0,0,F/1/S,2,16.0,0,303.0,70.0,151.0,565.0,2.0,1,5,1,1,4,1


In [79]:
categorical_columns = spaceship_titanic_df.select_dtypes(['object']).columns.to_numpy()
for col in categorical_columns:
  spaceship_titanic_df[col] = pd.to_numeric(spaceship_titanic_df[col], errors='coerce')

spaceship_titanic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8693 entries, 0 to 8692
Data columns (total 18 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   PassengerId   0 non-null      float64
 1   HomePlanet    8693 non-null   int64  
 2   CryoSleep     8693 non-null   int64  
 3   Cabin         0 non-null      float64
 4   Destination   8693 non-null   int64  
 5   Age           8693 non-null   float64
 6   VIP           8693 non-null   int64  
 7   RoomService   8693 non-null   float64
 8   FoodCourt     8693 non-null   float64
 9   ShoppingMall  8693 non-null   float64
 10  Spa           8693 non-null   float64
 11  VRDeck        8693 non-null   float64
 12  Transported   8693 non-null   int64  
 13  DeckName      8693 non-null   int64  
 14  DeckNumber    8693 non-null   int64  
 15  DeckSide      8693 non-null   int64  
 16  group         8693 non-null   int64  
 17  id            8693 non-null   int64  
dtypes: float64(8), int64(10)
mem

In [80]:
spaceship_titanic_df.drop(columns=['PassengerId', 'Cabin'], axis=1,inplace=True)
spaceship_titanic_df.head()

Unnamed: 0,HomePlanet,CryoSleep,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,DeckName,DeckNumber,DeckSide,group,id
0,1,0,2,39.0,0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,1,1
1,0,0,2,24.0,0,109.0,9.0,25.0,549.0,44.0,1,5,0,1,2,1
2,1,0,2,58.0,1,43.0,3576.0,0.0,6715.0,49.0,0,0,0,1,3,1
3,1,0,2,33.0,0,0.0,1283.0,371.0,3329.0,193.0,0,0,0,1,3,2
4,0,0,2,16.0,0,303.0,70.0,151.0,565.0,2.0,1,5,1,1,4,1


In [81]:
y = spaceship_titanic_df['Transported']
x = spaceship_titanic_df.drop('Transported', axis=1)

In [82]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.33, random_state=42)

print(x_train)
print(x_test)
print(y_train)
print(y_test)

      HomePlanet  CryoSleep  Destination   Age  VIP  RoomService  FoodCourt  \
4696           2          0            2  35.0    0       1337.0       49.0   
5946           0          0            2  28.0    0          0.0      152.0   
227            2          1            2  43.0    0          0.0        0.0   
3950           1          1            2  65.0    0          0.0        0.0   
7674           0          0            1  18.0    0          0.0        0.0   
...          ...        ...          ...   ...  ...          ...        ...   
5734           0          0            2  18.0    0         14.0        2.0   
5191           2          0            2  50.0    0        690.0        0.0   
5390           0          0            1  22.0    0        158.0        0.0   
860            2          0            2  34.0    0        379.0        0.0   
7270           1          0            0  28.0    0          7.0      489.0   

      ShoppingMall    Spa  VRDeck  DeckName  DeckNu

In [83]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import train_test_split, GridSearchCV,RandomizedSearchCV
from sklearn.preprocessing import normalize, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,classification_report, confusion_matrix
from xgboost import XGBClassifier
import xgboost as xgb
from lightgbm import LGBMClassifier
import lightgbm as lgb
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

print("Ensemble")
results_classifier={}

base_rf_model = RandomForestClassifier(random_state=42)

adaboost_model = AdaBoostClassifier(base_rf_model, n_estimators=50, random_state=42)

adaboost_model.fit(x_train, y_train)

y_pred_adaboost_model = adaboost_model.predict(x_test)

print(classification_report(y_test, y_pred_adaboost_model))

print(confusion_matrix(y_pred_adaboost_model,y_test))

accuracy = accuracy_score(y_test, y_pred_adaboost_model)

precision = precision_score(y_test, y_pred_adaboost_model,average='weighted')

recall = recall_score(y_test, y_pred_adaboost_model, average='macro')

f1 = f1_score(y_test, y_pred_adaboost_model, average='macro')

results_classifier['Ensemble'] = {'accuracy':accuracy, 'precision':precision, 'recall': recall, 'f1': f1}

Ensemble
              precision    recall  f1-score   support

           0       0.79      0.79      0.79      1424
           1       0.80      0.79      0.80      1445

    accuracy                           0.79      2869
   macro avg       0.79      0.79      0.79      2869
weighted avg       0.79      0.79      0.79      2869

[[1132  297]
 [ 292 1148]]


In [84]:
from sklearn.neighbors import KNeighborsClassifier

print("KNeighborsClassifier")

knn_model = KNeighborsClassifier()

knn_model.fit(x_train, y_train)

y_pred_knn = knn_model.predict(x_test)

print(classification_report(y_test, y_pred_knn))

print(confusion_matrix(y_pred_knn,y_test))

accuracy = accuracy_score(y_test, y_pred_knn)

precision = precision_score(y_test, y_pred_knn,average='weighted')

recall = recall_score(y_test, y_pred_knn, average='macro')

f1 = f1_score(y_test, y_pred_knn, average='macro')

results_classifier['knn'] = {'accuracy':accuracy, 'precision':precision, 'recall': recall, 'f1': f1}

KNeighborsClassifier
              precision    recall  f1-score   support

           0       0.77      0.71      0.74      1424
           1       0.73      0.79      0.76      1445

    accuracy                           0.75      2869
   macro avg       0.75      0.75      0.75      2869
weighted avg       0.75      0.75      0.75      2869

[[1004  302]
 [ 420 1143]]


In [85]:
print("tunnedknneighbour")

# defining tunning parameters
knn_param_grid  = {
      'n_neighbors' : range(1, 30, 2),
      'weights' : ['uniform','distance'],
      'metric' : ['minkowski','euclidean','manhattan'],
      'algorithm': ['auto'],
      'leaf_size': [5,6,7,8],
      'p':[1,2]
}

knn_model_tunnded = KNeighborsClassifier()

knn_grid_search = GridSearchCV(knn_model_tunnded, knn_param_grid, cv=5,scoring='accuracy', n_jobs=-1)
knn_grid_search.fit(x_train, y_train)

# selecting best hyperparameter combination
best_knn_params = knn_grid_search.best_params_
print(best_knn_params)

  # Use the best models to make predictions
best_knn_model = knn_grid_search.best_estimator_

# predicting the y_test
y_pred_tunned_knn = best_knn_model.predict(x_test)

# displaying classification report
print(classification_report(y_test, y_pred_tunned_knn))

print(confusion_matrix(y_pred_tunned_knn, y_test))

# computing the accuracy
accuracy = accuracy_score(y_test, y_pred_tunned_knn)
# computing the precision
precision = precision_score(y_test, y_pred_tunned_knn,average='weighted')
# computing the recall
recall = recall_score(y_test, y_pred_tunned_knn, average='macro')
 # computing the f1 score
f1 = f1_score(y_test, y_pred_tunned_knn, average='macro')

results_classifier['tunnedknn'] = {'accuracy':accuracy, 'precision':precision, 'recall': recall, 'f1': f1}

tunnedknneighbour
{'algorithm': 'auto', 'leaf_size': 5, 'metric': 'minkowski', 'n_neighbors': 19, 'p': 2, 'weights': 'uniform'}
              precision    recall  f1-score   support

           0       0.82      0.68      0.74      1424
           1       0.73      0.85      0.79      1445

    accuracy                           0.77      2869
   macro avg       0.77      0.76      0.76      2869
weighted avg       0.77      0.77      0.76      2869

[[ 966  215]
 [ 458 1230]]


In [86]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=41, max_depth=20)

rf.fit(x_train, y_train)

y_pred_rf = rf.predict(x_test)

print(classification_report(y_test, y_pred_rf))

print(confusion_matrix(y_pred_rf, y_test))

accuracy = accuracy_score(y_test, y_pred_rf)

precision = precision_score(y_test, y_pred_rf,average='weighted')

recall = recall_score(y_test, y_pred_rf, average='macro')

f1 = f1_score(y_test, y_pred_rf, average='macro')

results_classifier['random']  = {'accuracy':accuracy, 'precision':precision, 'recall': recall, 'f1': f1}

              precision    recall  f1-score   support

           0       0.79      0.79      0.79      1424
           1       0.79      0.79      0.79      1445

    accuracy                           0.79      2869
   macro avg       0.79      0.79      0.79      2869
weighted avg       0.79      0.79      0.79      2869

[[1121  303]
 [ 303 1142]]


In [87]:
print("LogisticRegression")

lr_model = LogisticRegression()

lr_model.fit(x_train, y_train)

y_pred_lr = lr_model.predict(x_test)

print(classification_report(y_test, y_pred_lr))

print(confusion_matrix(y_pred_lr,y_test))

accuracy = accuracy_score(y_test, y_pred_lr)

precision = precision_score(y_test, y_pred_lr,average='weighted')

recall = recall_score(y_test, y_pred_lr, average='macro')

f1 = f1_score(y_test, y_pred_lr, average='macro')

results_classifier['lr'] = {'accuracy':accuracy, 'precision':precision, 'recall': recall, 'f1': f1}

LogisticRegression
              precision    recall  f1-score   support

           0       0.82      0.68      0.74      1424
           1       0.73      0.85      0.79      1445

    accuracy                           0.77      2869
   macro avg       0.78      0.77      0.77      2869
weighted avg       0.78      0.77      0.77      2869

[[ 969  212]
 [ 455 1233]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [88]:
print("DecisionTreeClassifier")

dt_model = DecisionTreeClassifier()

dt_model.fit(x_train, y_train)

y_pred_dt = dt_model.predict(x_test)

print(classification_report(y_test, y_pred_dt))

print(confusion_matrix(y_pred_dt,y_test))

accuracy = accuracy_score(y_test, y_pred_dt)

precision = precision_score(y_test, y_pred_dt,average='weighted')

recall = recall_score(y_test, y_pred_dt, average='macro')

f1 = f1_score(y_test, y_pred_dt, average='macro')

results_classifier['dt'] = {'accuracy':accuracy, 'precision':precision, 'recall': recall, 'f1': f1}

DecisionTreeClassifier
              precision    recall  f1-score   support

           0       0.76      0.74      0.75      1424
           1       0.75      0.77      0.76      1445

    accuracy                           0.75      2869
   macro avg       0.75      0.75      0.75      2869
weighted avg       0.75      0.75      0.75      2869

[[1050  332]
 [ 374 1113]]


In [89]:
print("LGBMClassifier")

lgbmc_model = LGBMClassifier()

lgbmc_model.fit(x_train, y_train)

y_pred_lgbmc = lgbmc_model.predict(x_test)

print(classification_report(y_test, y_pred_lgbmc))

print(confusion_matrix(y_pred_lgbmc,y_test))

accuracy = accuracy_score(y_test, y_pred_lgbmc)

precision = precision_score(y_test, y_pred_lgbmc,average='weighted')

recall = recall_score(y_test, y_pred_lgbmc, average='macro')

f1 = f1_score(y_test, y_pred_lgbmc, average='macro')

results_classifier['lgbmc'] = {'accuracy':accuracy, 'precision':precision, 'recall': recall, 'f1': f1}

LGBMClassifier
[LightGBM] [Info] Number of positive: 2933, number of negative: 2891
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001575 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1892
[LightGBM] [Info] Number of data points in the train set: 5824, number of used features: 15
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.503606 -> initscore=0.014423
[LightGBM] [Info] Start training from score 0.014423
              precision    recall  f1-score   support

           0       0.81      0.79      0.80      1424
           1       0.80      0.82      0.81      1445

    accuracy                           0.80      2869
   macro avg       0.80      0.80      0.80      2869
weighted avg       0.80      0.80      0.80      2869

[[1123  267]
 [ 301 1178]]


In [90]:
print("XGBClassifier")

xgb_model = XGBClassifier()

xgb_model.fit(x_train, y_train)

y_pred_xgb = xgb_model.predict(x_test)

print(classification_report(y_test, y_pred_xgb))

print(confusion_matrix(y_pred_xgb,y_test))

accuracy = accuracy_score(y_test, y_pred_xgb)

precision = precision_score(y_test, y_pred_xgb,average='weighted')

recall = recall_score(y_test, y_pred_xgb, average='macro')

f1 = f1_score(y_test, y_pred_xgb, average='macro')

results_classifier['xgb'] = {'accuracy':accuracy, 'precision':precision, 'recall': recall, 'f1': f1}

XGBClassifier
              precision    recall  f1-score   support

           0       0.79      0.79      0.79      1424
           1       0.80      0.79      0.79      1445

    accuracy                           0.79      2869
   macro avg       0.79      0.79      0.79      2869
weighted avg       0.79      0.79      0.79      2869

[[1128  297]
 [ 296 1148]]


In [99]:
# Create a DataFrame
evaluation_df_task_2 = pd.DataFrame(results_classifier)

numeric_columns = evaluation_df_task_2.columns.difference(['Input Type', 'Model'])
evaluation_df_task_2[numeric_columns] = evaluation_df_task_2[numeric_columns].round(3)

# Display the DataFrame
evaluation_df_task_2.head()

Unnamed: 0,Ensemble,knn,tunnedknn,random,lr,dt,lgbmc,xgb
accuracy,0.795,0.748,0.765,0.789,0.768,0.754,0.802,0.793
precision,0.795,0.75,0.773,0.789,0.775,0.754,0.802,0.793
recall,0.795,0.748,0.765,0.789,0.767,0.754,0.802,0.793
f1,0.795,0.748,0.763,0.789,0.766,0.754,0.802,0.793
