# Training a Logistic Regression Binary Classifier

In [1]:
# Import Libraries

import pandas as pd

In [2]:
# Loading dataset

titanic_df = pd.read_csv('datasets/titanic_processed.csv')

titanic_df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked_C,Embarked_Q,Embarked_S
0,1,1,1,35.0,0,0,26.2875,0,0,1
1,0,3,0,40.0,1,0,9.475,0,0,1
2,0,2,1,24.0,0,0,10.5,0,0,1
3,0,2,1,60.0,1,1,39.0,0,0,1
4,1,2,0,24.0,2,3,18.75,0,0,1


In [3]:
# Dataset shape

titanic_df.shape

(712, 10)

In [4]:
# Split dataset to training set and testing set

from sklearn.model_selection import train_test_split

X = titanic_df.drop('Survived', axis=1)
Y = titanic_df['Survived']

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [5]:
# Shape of the training data

x_train.shape, y_train.shape

((569, 9), (569,))

In [6]:
# Shape of the testing data

x_test.shape, y_test.shape

((143, 9), (143,))

In [7]:
# Import, configure parameters and fit the model

from sklearn.linear_model import LogisticRegression

logistic_model = LogisticRegression(penalty='l2', C=1.0, solver='liblinear').fit(x_train, y_train)

In [8]:
# Run predictions on the test data

y_pred = logistic_model.predict(x_test)

In [9]:
# Save  y_test and y_pred in a dataframe pred_results

pred_results = pd.DataFrame({'y_test': y_test,
                             'y_pred': y_pred})

## Calculating Accuracy, Precison and Recall for the Classification Model

In [10]:
# Check the head of the dataframe 

pred_results.head()

Unnamed: 0,y_test,y_pred
589,1,0
157,1,1
99,1,0
313,1,1
49,0,1


In [11]:
# Confusion matrix using crosstab

titanic_crosstab = pd.crosstab(pred_results.y_pred, pred_results.y_test)

titanic_crosstab

y_test,0,1
y_pred,Unnamed: 1_level_1,Unnamed: 2_level_1
0,73,22
1,14,34


In [12]:
# Import metrics to measure how good is the model

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [13]:
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print('accuracy_score: ', acc)
print('precision_score: ', prec)
print('recall_score: ', recall)


accuracy_score:  0.7482517482517482
precision_score:  0.7083333333333334
recall_score:  0.6071428571428571


In [14]:
titanic_crosstab

y_test,0,1
y_pred,Unnamed: 1_level_1,Unnamed: 2_level_1
0,73,22
1,14,34


In [15]:
TP = titanic_crosstab[1][1]
TN = titanic_crosstab[0][0]
FP = titanic_crosstab[0][1]
FN = titanic_crosstab[1][0]

In [16]:
 # Manual Calculation
    
accuracy_score_verified = (TP + TN) / (TP + FP + TN + FN)

accuracy_score_verified

0.7482517482517482

In [17]:
precision_score_verified = TP / (TP + FP)

precision_score_verified

0.7083333333333334

In [18]:
recall_score_survived = TP / (TP + FN)

recall_score_survived

0.6071428571428571