In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

# 1. Load the dataset
# Ensure train.csv is in the 'model' folder
df = pd.read_csv('train.csv')

# 2. Feature Selection (Selecting 5 features + Target)
selected_features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Fare']
target = 'Survived'

X = df[selected_features].copy()
y = df[target]

# 3. Data Preprocessing
# a. Handling missing values
X['Age'] = X['Age'].fillna(X['Age'].median())
X['Fare'] = X['Fare'].fillna(X['Fare'].median())

# b. Encoding categorical variables (Sex: male=0, female=1)
X['Sex'] = X['Sex'].map({'male': 0, 'female': 1})

# 4. Split data into Training (80%) and Testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Implement Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# 6. Train the model
model.fit(X_train, y_train)

# 7. Evaluate the model
predictions = model.predict(X_test)
print("--- Classification Report ---")
print(classification_report(y_test, predictions))

# 8. Demonstrate saved model can be reloaded (Persistence)
joblib.dump(model, 'titanic_survival_model.pkl')
print("\nModel saved successfully as 'titanic_survival_model.pkl'")

# 9. Reloading test
loaded_model = joblib.load('titanic_survival_model.pkl')
sample_prediction = loaded_model.predict([[3, 0, 22, 1, 7.25]]) # Test with a sample
print(f"Reloaded model test prediction: {'Survived' if sample_prediction[0] == 1 else 'Did Not Survive'}")

--- Classification Report ---
              precision    recall  f1-score   support

           0       0.83      0.86      0.84       105
           1       0.79      0.74      0.76        74

    accuracy                           0.81       179
   macro avg       0.81      0.80      0.80       179
weighted avg       0.81      0.81      0.81       179


Model saved successfully as 'titanic_survival_model.pkl'
Reloaded model test prediction: Did Not Survive


