### Import Libraries

In [21]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from imblearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score

In [9]:
Data = pd.read_csv('Train_set.csv',index_col='PassengerId')
Data.head()
X = Data.iloc[:,1:]
y = Data.iloc[:,0]
y.head()

PassengerId
1    0
2    1
3    1
4    1
5    0
Name: Survived, dtype: int64

### Split the data

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state=13)

### Scale the data

In [30]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Algorithms

In [15]:
Classifiers = ['Log_Regression', 'Nearest Neighbors', 'Neural Networks', 
               'Decision Trees', 'Random Forest', 'Adaboost']
Metrics = ['Accuracy', 'AUC']
Model_Evaluation = pd.DataFrame(index = Classifiers, columns = Metrics)

#### Logistic Regression

In [31]:
reg_param_grid = {'reg__max_iter': np.arange(100, 251, 50)}

pipe_reg = Pipeline([('scaler', StandardScaler()),
                    ('reg',LogisticRegression())])

gs_reg = GridSearchCV(pipe_reg, reg_param_grid, cv=5, scoring = "accuracy", refit = True)
gs_reg.fit(X_train_scaled, y_train)
y_pred = gs_reg.predict(X_test_scaled)
tn, fp, fn, tp = confusion_matrix(y_test,y_pred).ravel()

Model_Evaluation.loc['Log_Regression', 'Accuracy'] = round(100*accuracy_score(y_test, y_pred),2)
Model_Evaluation.loc['Log_Regression', 'AUC'] = round(100*roc_auc_score(y_test, y_pred),2)

### Test results

print("Test Results")
print("--------------------")
print("Accuracy:" + " {0:.2f}%".format(100*accuracy_score(y_test, y_pred)))
print("AUC:" + " {0:.2f}%".format(100*roc_auc_score(y_test, y_pred)))

Test Results
--------------------
Accuracy: 0.8284
AUC: 0.7983


Training results

In [41]:
y_results = gs_reg.best_estimator_.predict(X_train_scaled)
tn, fp, fn, tp = confusion_matrix(y_train,y_results).ravel()
print("Training Results")
print("--------------------")
print("Accuracy:" + " {0:.2f}%".format(100*accuracy_score(y_train, y_results)))
print("AUC:" + " {0:.2f}%".format(100*roc_auc_score(y_train, y_results)))

Training Results
--------------------
Accuracy: 81.11%
AUC: 79.02%


Confusion Matrix

In [33]:
c_m = np.array([[tp, fp],[fn, tn]])
Confusion_Matrix_Logistic_Regression = pd.DataFrame(data = c_m,
                                 index = ['Survived', 'Died'], 
                                columns = ['Survived', 'Died'])

Confusion_Matrix_Logistic_Regression

Unnamed: 0,Survived,Died
Survived,204,55
Died,88,410


In [34]:
print(gs_reg.best_estimator_)
print(gs_reg.best_params_)

Pipeline(steps=[('scaler', StandardScaler()), ('reg', LogisticRegression())])
{'reg__max_iter': 100}


In [42]:
Model_Evaluation

Unnamed: 0,Accuracy,AUC
Log_Regression,0.8284,0.7983
Nearest Neighbors,,
Neural Networks,,
Decision Trees,,
Random Forest,,
Adaboost,,


In [45]:
Model_Evaluation.loc['Log_Regression', 'Accuracy'] = "{0:.2f}%".format(100*accuracy_score(y_test, y_pred))
Model_Evaluation.loc['Log_Regression', 'AUC'] = " {0:.2f}%".format(100*roc_auc_score(y_test, y_pred))

In [46]:
Model_Evaluation

Unnamed: 0,Accuracy,AUC
Log_Regression,82.84%,79.83%
Nearest Neighbors,,
Neural Networks,,
Decision Trees,,
Random Forest,,
Adaboost,,
