In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

# 1. Load the dataset
df = pd.read_csv('train.csv')

# 2. Data Preprocessing
features = ['Pclass', 'Sex', 'Age', 'Fare', 'Embarked']
target = 'Survived'

# Filter dataset
df = df[features + [target]].copy()

# a. Handling Missing Values
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df['Fare'].fillna(df['Fare'].median(), inplace=True)

# c. Encoding Categorical Variables (Manual mapping to ensure consistency in App)
# Sex: Male=0, Female=1
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
# Embarked: S=0, C=1, Q=2
df['Embarked'] = df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

X = df[features]
y = df[target]

# 3. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Implement Algorithm: Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 5. Evaluate the model
y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

# 6. Save the model
model_filename = 'titanic_survival_model.pkl'
joblib.dump(model, model_filename)
print(f"Model saved successfully as '{model_filename}'")

# 7. Demonstrate reloading
loaded_model = joblib.load(model_filename)
print("Model reloaded successfully.")

Model Accuracy: 0.7932960893854749

Classification Report:

              precision    recall  f1-score   support

           0       0.81      0.84      0.83       105
           1       0.76      0.73      0.74        74

    accuracy                           0.79       179
   macro avg       0.79      0.78      0.79       179
weighted avg       0.79      0.79      0.79       179

Model saved successfully as 'titanic_survival_model.pkl'
Model reloaded successfully.
