1. Import necessary libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

2. Load the dataset

In [None]:
df = pd.read_csv('Titanic-Dataset.csv')
df.head(5)

3. Data Pre-processing

In [None]:
# Drop columns that don't help prediction
df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

# Handle missing values
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Fare'].fillna(df['Fare'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

# Convert 'Sex' to numeric
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})

# One-hot encode 'Embarked'
df = pd.get_dummies(df, columns=['Embarked'], drop_first=True)

df.head(5)

4. Exploratory Data Analysis with Graphs

In [None]:
plt.figure(figsize=(12,5))

plt.subplot(1,2,1)
sns.countplot(data=df, x='Survived')
plt.title('Survival Counts')

plt.subplot(1,2,2)
sns.histplot(data=df, x='Age', hue='Survived', bins=30, alpha=0.5, kde=True)
plt.title('Age Distribution by Survival')
plt.tight_layout()
plt.show()

plt.figure(figsize=(10,4))
sns.countplot(data=df, x='Pclass', hue='Survived')
plt.title('Survival by Passenger Class')
plt.show()

5. Feature Selection and Train/Test Split

In [None]:
X = df.drop('Survived', axis=1)
y = df['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

6. Model Training with Cross-validation

In [None]:
clf = RandomForestClassifier(random_state=42)
cv_scores = cross_val_score(clf, X_train, y_train, cv=5)
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean CV accuracy: {cv_scores.mean():.3f}")

# Train final model
clf.fit(X_train, y_train)

7. Prediction and Evaluation

In [None]:
y_pred = clf.predict(X_test)
print("\nTest set accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

8. Predicting for User Input

In [None]:
def predict_user_survival(model):
    print("Enter passenger details for prediction:")
    try:
        Pclass = int(input("Ticket class (1, 2, or 3): "))
        Sex = input("Sex (male or female): ").strip().lower()
        Age = float(input("Age: "))
        SibSp = int(input("Number of siblings/spouses aboard: "))
        Parch = int(input("Number of parents/children aboard: "))
        Fare = float(input("Fare: "))
        Embarked = input("Port of Embarkation (C, Q, or S): ").strip().upper()
    except ValueError:
        print("Invalid input!")
        return

    # Arrange input for the model
    sex_num = 0 if Sex == 'male' else 1
    embarked_S = 1 if Embarked == 'S' else 0
    embarked_Q = 1 if Embarked == 'Q' else 0

    user_features = np.array([[Pclass, sex_num, Age, SibSp, Parch, Fare, embarked_Q, embarked_S]])
    cols = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_Q', 'Embarked_S']
    user_df = pd.DataFrame(user_features, columns=cols)
   
    pred = model.predict(user_df)[0]
    print("Prediction: Survived" if pred == 1 else "Prediction: Did not survive")
    #print(user_df) 
# Call the function after training
predict_user_survival(clf)