# Importing Tools

In [1]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
from scipy import stats
import sklearn
import re

#sklearn library
# 1.model_selection
from sklearn.model_selection import train_test_split


# 2.preprocessing
from sklearn.preprocessing import OneHotEncoder


# 3.metrics
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# 4.model
from sklearn.svm import SVC 
from sklearn.metrics import accuracy_score as acc
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.ensemble import RandomForestClassifier as rfc
from xgboost import XGBClassifier as xgb
from sklearn.ensemble import AdaBoostClassifier as adbc

import warnings
warnings.filterwarnings('ignore')

# Data Preprocessing

In [None]:
train= pd.read_csv("/kaggle/input/titanic/train.csv")
df=train.copy()
df.head()

In [None]:
test= pd.read_csv("/kaggle/input/titanic/test.csv")
df_test=test.copy()
df_test.head()

# Quick dataset information 🤓

In [None]:
df.info()

In [None]:
df_test.info()

In [None]:
df.describe()

In [None]:
df_test.describe()

# 🧹 Data Processing
1. Deal with Null values
2. Change categorical values using one hot encoder
3. Feature Scaling


In [None]:
#Dropping Passenger Id, ticket, and name
train.drop(['PassengerId', 'Ticket', 'Name', 'Cabin'], axis=1, inplace=True)
dfps=test.PassengerId.copy()
test.drop(['PassengerId', 'Ticket', 'Name', 'Cabin'], axis=1, inplace=True)

In [None]:
# Taking care of missing data ('Age')
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(train.iloc[: , 3:7])
train.iloc[: , 3:7] = imputer.transform(train.iloc[: , 3:7])

# Taking care of missing data ('Embarked'), most frequent value
imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
imputer.fit(train.iloc[: , 6:])
train.iloc[: , 6:] = imputer.transform(train.iloc[: , 6:])

In [None]:
# Taking care of missing data ('Age') & ('Fare') Test
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(test.iloc[: , 2:6])
test.iloc[: , 2:6] = imputer.transform(test.iloc[: , 2:6])


In [None]:
train.info()

In [None]:
test.info()

In [None]:
sex={'male':0,'female':1}
train=train.replace({'Sex':sex})
test=test.replace({'Sex':sex})

In [None]:
embark={'C':1,'Q':2, 'S':3}
train=train.replace({'Embarked':embark})
test=test.replace({'Embarked':embark})

# Training Model

In [None]:
features_train=train.iloc[:,1:8]
labels_train  = train.iloc[:,0]
x_tr,x_te,y_tr,y_te=train_test_split(features_train,labels_train,test_size=0.2,random_state=30)


In [None]:
svc = SVC(kernel='linear', random_state = 1)
rfc = rfc(n_estimators=200, criterion='gini', max_depth=None, min_samples_split=4, min_samples_leaf=1)
knc = KNeighborsClassifier()
xgb = xgb()
adb = adbc(n_estimators=100, random_state=1)

In [None]:
svc.fit(x_tr,y_tr)
rfc.fit(x_tr,y_tr)
knc.fit(x_tr,y_tr)
xgb.fit(x_tr,y_tr)
adb.fit(x_tr,y_tr)

y_pr_svc= svc.predict(x_te)
y_pr_rfc= rfc.predict(x_te)
y_pr_knc= knc.predict(x_te)
y_pr_xgb= xgb.predict(x_te)
y_pr_adb= adb.predict(x_te)

pred_list = ['Support Vector Machine','Random Forest', 'KNN','XGBoost','Ada Boost']
pred_all  = [acc(y_pr_svc,y_te)*100,acc(y_pr_rfc,y_te)*100,acc(y_pr_knc,y_te)*100,acc(y_pr_xgb,y_te)*100,acc(y_pr_adb,y_te)*100]

for i in range(5):
  print('{} has {} percent accuracy'.format(pred_list[i],pred_all[i]))

In [None]:
test.info()

In [None]:
y_prd_svc= svc.predict(test)
y_prd_rfc= rfc.predict(test)
y_prd_knc= knc.predict(test)
y_prd_xgb= xgb.predict(test)
y_prd_adb= adb.predict(test)

In [None]:
output = pd.DataFrame({'PassengerId': dfps, 'Survived': y_prd_xgb})
output.to_csv('submission.csv', index=False)
print("Your submission was successful!")

In [None]:
print(output)
classification_report()