# Intermediate Task: Titanic Classification (Logistic Regression)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)

df = df[['Survived','Pclass','Sex','Age','Fare']].dropna().copy()
df['Sex'] = df['Sex'].map({'male':0, 'female':1})

X = df[['Pclass','Sex','Age','Fare']]
y = df['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

pred = model.predict(X_test)
proba = model.predict_proba(X_test)[:,1]

print("Accuracy:", round(accuracy_score(y_test, pred), 4))
print("\nClassification report:\n", classification_report(y_test, pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, pred))

plt.figure()
plt.scatter(range(len(proba)), proba, s=10)
plt.axhline(0.5, linestyle="--")
plt.title("Predicted survival probability (holdout)")
plt.xlabel("Sample index"); plt.ylabel("P(survival)")
plt.show()
