# Titanic Survival Prediction
Using Machine Learning (Logistic Regression)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
# Load the dataset
df = pd.read_csv('titanic.csv')  # Use 'train.csv' if using Kaggle version
df.head()

In [None]:
# Basic information
df.info()
df.describe()
df.isnull().sum()

In [None]:
# Fill missing values
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

# Drop column with too many missing values
df.drop('Cabin', axis=1, inplace=True)

In [None]:
# Drop irrelevant features
df.drop(['Name', 'Ticket', 'PassengerId'], axis=1, inplace=True)

# Encode categorical variables
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])  # Male=1, Female=0
df['Embarked'] = le.fit_transform(df['Embarked'])  # C=0, Q=1, S=2

In [None]:
# Define features and label
X = df.drop('Survived', axis=1)
y = df['Survived']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train logistic regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

In [None]:
# Predict and evaluate
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

In [None]:
# Sample prediction
# Format: [Pclass, Sex, Age, SibSp, Parch, Fare, Embarked]
sample = [[3, 0, 25, 0, 0, 7.25, 2]]
result = model.predict(sample)[0]
print("Survived" if result == 1 else "Did not survive")

In [None]:
# Visualizations
sns.countplot(x='Survived', data=df)
plt.title("Survival Count")
plt.show()

sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Matrix")
plt.show()