In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import pickle

# -----------------------------
# Step 1: Load & Preprocess Data
# -----------------------------
df = pd.read_csv("train.csv")

def get_title(name):
    if '.' in name:
        return name.split(',')[1].split('.')[0].strip()
    else:
        return 'Unknown'

# Extract and normalize titles
df['Title'] = df['Name'].map(get_title)

def replace_titles(x):
    title = x['Title']
    if title in ['Capt', 'Col', 'Major']:
        return 'Officer'
    elif title in ["Jonkheer", "Don", 'the Countess', 'Dona', 'Lady', "Sir"]:
        return 'Royalty'
    elif title in ['Mme', 'Lady']:
        return 'Mrs'
    elif title in ['Mlle', 'Ms']:
        return 'Miss'
    else:
        return title

df['Title'] = df.apply(replace_titles, axis=1)

# Fill missing values
df['Age'] = df['Age'].fillna(df['Age'].median())
df['Fare'] = df['Fare'].fillna(df['Fare'].median())
df['Embarked'] = df['Embarked'].fillna("S")

# Drop irrelevant columns
df = df.drop(["Cabin", "Ticket", "Name"], axis=1)

# Encode categorical features
df['Sex'] = df['Sex'].replace({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].replace({'S': 0, 'C': 1, 'Q': 2})
df['Title'] = df['Title'].replace({
    'Mr': 0, 'Miss': 1, 'Mrs': 2, 'Master': 3,
    'Dr': 4, 'Rev': 5, 'Officer': 6, 'Royalty': 7
})

# -----------------------------
# Step 2: Train/Test Split
# -----------------------------
predictors = df.drop(['Survived', 'PassengerId'], axis=1)
target = df['Survived']

x_train, x_val, y_train, y_val = train_test_split(
    predictors, target, test_size=0.22, random_state=0
)

# -----------------------------
# Step 3: Train RandomForest
# -----------------------------
randomforest = RandomForestClassifier(
    n_estimators=200,
    max_depth=7,
    random_state=42
)
randomforest.fit(x_train, y_train)

# -----------------------------
# Step 4: Save Model + Features
# -----------------------------
model_data = {
    "model": randomforest,
    "features": list(predictors.columns)
}
pickle.dump(model_data, open("titanic_model.sav", "wb"))

print("✅ Model trained and saved as titanic_model.sav")


# -----------------------------
# Step 5: Runner Function
# -----------------------------
def prediction_model(pclass, sex, age, sibsp, parch, fare, embarked, title):
    import pickle
    import pandas as pd

    # Load model + features
    model_data = pickle.load(open("titanic_model.sav", "rb"))
    model = model_data["model"]
    features = model_data["features"]

    # Prepare input DataFrame in correct feature order
    input_df = pd.DataFrame([{
        "Pclass": pclass,
        "Sex": sex,
        "Age": age,
        "SibSp": sibsp,
        "Parch": parch,
        "Fare": fare,
        "Embarked": embarked,
        "Title": title
    }])[features]

    prediction = model.predict(input_df)[0]
    return prediction



  df['Sex'] = df['Sex'].replace({'male': 0, 'female': 1})
  df['Embarked'] = df['Embarked'].replace({'S': 0, 'C': 1, 'Q': 2})
  df['Title'] = df['Title'].replace({


✅ Model trained and saved as titanic_model.sav


In [None]:
from google.colab import files
files.download("titanic_model.sav")



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>