In [3]:
#Logistic Regression on Titanic Data 

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

d = pd.read_csv("titanic.csv")

# Drop useless columns
d = d.drop(columns=['Name', 'Ticket', 'Cabin'])

# Fill missing values
d['Age'] = d['Age'].fillna(d['Age'].mean())
d['Embarked'] = d['Embarked'].fillna(d['Embarked'].mode()[0])

# Convert categorical columns to numeric
d['Sex'] = d['Sex'].map({'male': 0, 'female': 1})
d['Embarked'] = d['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

# Features: input columns
X = d[['Pclass', 'Sex', 'Age', 'Fare', 'Embarked']]

# Target: output column
y = d['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(" Accuracy:", accuracy_score(y_test, y_pred))
print("\n Classification Report:\n", classification_report(y_test, y_pred))
print("\n Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

test_passenger = pd.DataFrame([[2, 1, 30, 50, 1]], columns=['Pclass', 'Sex', 'Age', 'Fare', 'Embarked'])
prediction = model.predict(test_passenger)

print("Prediction:", "Survived " if prediction[0] == 1 else "Did Not Survive ❌")


 Accuracy: 0.7932960893854749

 Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.84      0.83       105
           1       0.76      0.73      0.74        74

    accuracy                           0.79       179
   macro avg       0.79      0.78      0.79       179
weighted avg       0.79      0.79      0.79       179


 Confusion Matrix:
 [[88 17]
 [20 54]]
Prediction: Survived 
