In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv('/kaggle/input/titanic/train.csv')
test = pd.read_csv('/kaggle/input/titanic/test.csv')

In [None]:
train.head()

In [None]:
test = test.drop(['Name'],axis=1)
test.head()

In [None]:
training_encoded = pd.get_dummies(train,columns=['Sex','Embarked'],drop_first=False)
X = training_encoded[['Pclass','Sex_male','Embarked_C','Embarked_Q','Embarked_S']]
y = training_encoded['Survived']
testing_encoded = pd.get_dummies(test,columns=['Sex','Embarked'],drop_first=True)
X['Sex_male'] = X['Sex_male'].replace({True: 1, False: 0})
X['Embarked_Q'] = X['Embarked_Q'].replace({True: 1, False: 0})
X['Embarked_S'] = X['Embarked_S'].replace({True: 1, False: 0})
X['Embarked_C'] = X['Embarked_C'].replace({True: 1, False: 0})
X.head()

In [None]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegressionCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
gbc = HistGradientBoostingClassifier(learning_rate=0.1,max_iter=1000)
gbc.fit(X_train,y_train)
gbc_preds = gbc.predict(X_test)
print(classification_report(y_test,gbc_preds))


In [None]:
xgb_model = xgb.XGBClassifier(use_label_encoder=False)
xgb_model.fit(X_train,y_train)
xgb_preds = xgb_model.predict(X_test)
print(classification_report(y_test,xgb_preds))

In [None]:
param_grid_xgb = {
    'max_depth': [3, 4, 5],
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2]
}
grid_search_xgb = GridSearchCV(estimator=xgb_model, param_grid=param_grid_xgb, scoring='accuracy', cv=5, verbose=1, n_jobs=-1)
grid_search_xgb.fit(X_train,y_train)
best_params_xgb = grid_search_xgb.best_params_


In [None]:
best_xgb_model = grid_search_xgb.best_estimator_
best_xgb_preds = best_xgb_model.predict(X_test)
print(classification_report(y_test,best_xgb_preds))

In [None]:
param_grid_gbc = {
    'learning_rate': [0.01, 0.1, 0.2],
    'max_iter': [100, 200, 300],
    'max_leaf_nodes': [31, 50, 100],
    'max_depth': [None, 3, 5],
    'min_samples_leaf': [20, 50, 100],
    'l2_regularization': [0.0, 0.1, 1.0]
}
grid_search_gbc = GridSearchCV(estimator=gbc,param_grid=param_grid_gbc,scoring='accuracy',cv=5,verbose=1,n_jobs=-1,error_score='raise')
grid_search_gbc.fit(X_train,y_train)
best_params_gbc = grid_search_gbc.best_params_


In [None]:
best_gbc_model = grid_search_gbc.best_estimator_
best_gbc_preds = best_gbc_model.predict(X_test)
print(classification_report(y_test,best_gbc_preds))

In [None]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train,y_train)
rf_preds = rf.predict(X_test)
print(classification_report(y_test,rf_preds))

In [None]:
param_grid_rf = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}
grid_search_rf = GridSearchCV(estimator=rf, param_grid=param_grid_rf, scoring='accuracy', cv=5, verbose=1, n_jobs=-1)
grid_search_rf.fit(X_train,y_train)

In [None]:
best_params_rf = grid_search_rf.best_params_
best_rf_model = grid_search_rf.best_estimator_
best_rf_preds = best_rf_model.predict(X_test)
print(classification_report(y_test,best_rf_preds))

In [None]:
test_encoded = pd.get_dummies(test)
guess_X = training_encoded[['Pclass','Sex_male','Embarked_C','Embarked_Q','Embarked_S']]
guess_y_rf = pd.DataFrame(best_rf_model.predict(guess_X))
submission = pd.concat([test['PassengerId'],guess_y_rf],axis=1)
submission.columns = ['PassengerId','Survived']
submission.reset_index(drop=True,inplace=True)