# <center> **Kaggle’s Spaceship Titanic Competition**
# <center> **Machine Learning**

# **Libraries**

In [60]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

import functions
import importlib
importlib.reload(functions)

import lazypredict
from lazypredict.Supervised import LazyClassifier

import time

# **Load Data**

In [62]:
data = pd.read_csv(
    r"C:\Users\Dell\Documents\AI\Titanic\Data\Data\data.csv",
    index_col=False
)

train = pd.read_csv(
    r"C:\Users\Dell\Documents\AI\Titanic\Data\train.csv",
    index_col=False
)

test = pd.read_csv(
    r"C:\Users\Dell\Documents\AI\Titanic\Data\test.csv",
    index_col=False
)


random_state = 101
target = 'Transported'

# **Machine Learning**

## **Split Data Back to Train and Test**

In [63]:
train=data[data['PassengerId'].isin(train['PassengerId'].values)].copy()
test=data[data['PassengerId'].isin(test['PassengerId'].values)].copy()

## **Drop Unneeded Features**

In [64]:
train.drop(['Group', 'CabinNumber'], axis=1, inplace=True)
test.drop(['Group', 'CabinNumber'], axis=1, inplace=True)

## **Pre-Processing**

In [65]:
X = train.drop(target, axis=1)
y = train[target]
y = y.astype(bool)

numerical_cols = [cname for cname in X.columns if X[cname].dtype in ['int64', 'float64']]
categorical_cols = [cname for cname in X.columns if X[cname].dtype in ["object", "bool"]]

scaler = StandardScaler()
scaled_numerical = scaler.fit_transform(X[numerical_cols])

encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)  # Set sparse_output=False to get dense output
encoded_categorical = encoder.fit_transform(X[categorical_cols])

scaled_numerical_df = pd.DataFrame(scaled_numerical, columns=numerical_cols)

encoded_categorical_df = pd.DataFrame(encoded_categorical, columns=encoder.get_feature_names_out(categorical_cols))

scaled_numerical_df.reset_index(drop=True, inplace=True)
encoded_categorical_df.reset_index(drop=True, inplace=True)

X = pd.concat([scaled_numerical_df, encoded_categorical_df], axis=1)

## **Train Test Split**

In [66]:
X, y = shuffle(X, y, random_state=random_state)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=random_state)

## **LazyPredict**

In [67]:
clf = LazyClassifier(verbose=0,ignore_warnings=True)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
models

 97%|█████████▋| 30/31 [4:20:08<20:27, 1227.44s/it]  

[LightGBM] [Info] Number of positive: 3502, number of negative: 3452
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002517 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1740
[LightGBM] [Info] Number of data points in the train set: 6954, number of used features: 43
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.503595 -> initscore=0.014380
[LightGBM] [Info] Start training from score 0.014380


100%|██████████| 31/31 [4:20:21<00:00, 503.90s/it] 


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LGBMClassifier,0.8,0.8,0.8,0.8,12.69
XGBClassifier,0.79,0.79,0.79,0.79,34.05
BaggingClassifier,0.78,0.78,0.78,0.78,32.24
DecisionTreeClassifier,0.77,0.77,0.77,0.77,8.47
RandomForestClassifier,0.76,0.76,0.76,0.76,40.65
AdaBoostClassifier,0.76,0.76,0.76,0.76,54.39
ExtraTreesClassifier,0.74,0.74,0.74,0.74,78.37
BernoulliNB,0.73,0.73,0.73,0.73,10.29
LinearDiscriminantAnalysis,0.73,0.73,0.73,0.73,537.8
NearestCentroid,0.73,0.73,0.73,0.73,8.63


### **Insights**

> **1. LazyPredict Time to Execute:** 4 Hours. </BR>
> **2. Top 3 Modles:** LGBM Classifier, XGB Classifier, Bagging Classifier </BR>
> **3. Best Accuracy Score:** 0.80 </BR>
> **4. Conclusion:** Results are comparable to results from previous notebook </BR>