# Logistic Regression

## Data Preprocessing

In [None]:
#Importing all required libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

titanic_data = pd.read_csv("../../data/titanic_data.csv")

In [None]:
print(len(titanic_data))
titanic_data.head()

In [None]:
titanic_data.drop(columns=["PassengerId", "Name", "Ticket", "Cabin"], inplace=True)

In [None]:
titanic_data.shape
titanic_data.isnull().sum()

In [None]:
titanic_data[titanic_data.isnull().any(axis=1)].count()

In [None]:
titanic_data.dropna(inplace= True)

In [None]:
print(pd.crosstab(titanic_data["Sex"], titanic_data["Survived"]),"\n\n")
print(pd.crosstab(titanic_data["Pclass"], titanic_data["Survived"]))

In [None]:
titanic_data_corr = titanic_data.select_dtypes(["number"]).corr()
fig, axes = plt.subplots(figsize = (10,6))
sns.heatmap(titanic_data_corr, annot=True)
plt.show()

In [None]:
from sklearn.preprocessing import LabelEncoder

labelEncoder = LabelEncoder()

titanic_data.Sex = labelEncoder.fit_transform(titanic_data.Sex)
titanic_data = pd.get_dummies(titanic_data, columns= ["Embarked"], dtype=int)
titanic_data.shape

In [None]:
titanic_data = titanic_data.sample(frac=1).reset_index(drop=True)
titanic_data.shape

In [None]:
titanic_data.to_csv("../../data/titanic_data_processed.csv", index = False)

## Split and training

In [102]:
from sklearn.model_selection import train_test_split

titanic_data_processed = pd.read_csv("../../data/titanic_data_processed.csv")

X = titanic_data_processed.drop(columns=["Survived"], axis=1)
Y = titanic_data_processed["Survived"]

x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=42, test_size=0.2)

print("x_train shape : ", x_train.shape)
print("x_test shape : ", x_test.shape)
print("y_train shape : ", y_train.shape)
print("y_test shape : ", y_test.shape)

x_train shape :  (834, 9)
x_test shape :  (209, 9)
y_train shape :  (834,)
y_test shape :  (209,)


In [103]:
from sklearn.linear_model import LogisticRegression

# Model training
logistic_model = LogisticRegression(penalty='l2', C=1.0, solver='liblinear')
logistic_model.fit(x_train, y_train)

# Enhanced evaluation
train_score = logistic_model.score(x_train, y_train)
test_score = logistic_model.score(x_test, y_test)

print(f"Training Accuracy: {train_score:.4f}")
print(f"Test Accuracy: {test_score:.4f}")

Training Accuracy: 0.8465
Test Accuracy: 0.8708


In [105]:
from sklearn.metrics import classification_report

y_pred = logistic_model.predict(x_test)

# Comprehensive results analysis
final_result = pd.DataFrame({
    "actual": y_test,
    "predicted": y_pred,
    "correct": y_test == y_pred
})

conf_matrix = pd.crosstab(y_pred, y_test, 
                         rownames=['Predicted'], 
                         colnames=['Actual'],
                         margins=True)
print("\nConfusion Matrix:")
print(conf_matrix)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Feature importance (if you want to understand what drives predictions)
if hasattr(logistic_model, 'coef_'):
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': logistic_model.coef_[0]
    }).sort_values('importance', ascending=False)
    
    print("\nFeature Importance:")
    print(feature_importance)


Confusion Matrix:
Actual       0   1  All
Predicted              
0          113  14  127
1           13  69   82
All        126  83  209

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.90      0.89       126
           1       0.84      0.83      0.84        83

    accuracy                           0.87       209
   macro avg       0.87      0.86      0.86       209
weighted avg       0.87      0.87      0.87       209


Feature Importance:
      feature  importance
6  Embarked_C    1.175492
7  Embarked_Q    0.846316
8  Embarked_S    0.844095
5        Fare    0.002551
2         Age   -0.020512
4       Parch   -0.152503
3       SibSp   -0.269842
0      Pclass   -0.680436
1         Sex   -3.355209
