In [1]:
import joblib
import pandas as pd

# Load Titanic dataset
df = pd.read_csv("Titanic-Dataset.csv")

# Load your saved bundle
bundle = joblib.load("Random Forest Model.joblib")

# Keep only the feature columns that were used during training
X_new = df[bundle['feature_columns']].copy()

# Apply encoders (handle unseen categories safely)
for col, le in bundle['encoders'].items():
    if col in X_new.columns:
        X_new[col] = X_new[col].astype(str).map(
            lambda s: le.transform([s])[0] if s in le.classes_ else -1
        )

# Scale numeric features
scaler = bundle['scaler']
X_new_scaled = scaler.transform(X_new)

# Predict
model = bundle['model']
predictions = model.predict(X_new_scaled)

# Decode predictions if classification
if bundle['task_type'] == 'classification' and bundle['target_column'] in bundle['encoders']:
    predictions = bundle['encoders'][bundle['target_column']].inverse_transform(predictions)

# Combine predictions with dataset
results_df = df.copy()
results_df['Prediction'] = predictions

# Show a few rows
results_df[['PassengerId', 'Name', 'Sex', 'Age', 'Pclass', 'Survived', 'Prediction']].head(15)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Unnamed: 0,PassengerId,Name,Sex,Age,Pclass,Survived,Prediction
0,1,"Braund, Mr. Owen Harris",male,22.0,3,0,0
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,1,1
2,3,"Heikkinen, Miss. Laina",female,26.0,3,1,1
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,1,1
4,5,"Allen, Mr. William Henry",male,35.0,3,0,0
5,6,"Moran, Mr. James",male,,3,0,0
6,7,"McCarthy, Mr. Timothy J",male,54.0,1,0,0
7,8,"Palsson, Master. Gosta Leonard",male,2.0,3,0,0
8,9,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,3,1,1
9,10,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,2,1,1


In [2]:
import pandas as pd

# Example passenger input
new_df = pd.DataFrame([{
    "PassengerId": 1001,
    "Pclass": 1,               # 1st class
    "Name": "Doe, Mr. John",   # arbitrary
    "Sex": "male",
    "Age": 28,
    "SibSp": 0,                # no siblings/spouse
    "Parch": 0,                # no parents/children
    "Ticket": "A12345",
    "Fare": 72.50,
    "Cabin": "C85",
    "Embarked": "S",
    "Survived": None           # leave target empty for prediction
}])

new_df


Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,1001,1,"Doe, Mr. John",male,28,0,0,A12345,72.5,C85,S,


In [3]:
# Prepare new_df for prediction using the same pipeline

# Keep only the feature columns
X_input = new_df[bundle['feature_columns']].copy()

# Apply encoders (handle unseen categories safely)
for col, le in bundle['encoders'].items():
    if col in X_input.columns:
        X_input[col] = X_input[col].astype(str).map(
            lambda s: le.transform([s])[0] if s in le.classes_ else -1
        )

# Scale numeric features
X_input_scaled = bundle['scaler'].transform(X_input)

# Predict
prediction = bundle['model'].predict(X_input_scaled)

# Decode prediction if classification
if bundle['task_type'] == 'classification' and bundle['target_column'] in bundle['encoders']:
    prediction = bundle['encoders'][bundle['target_column']].inverse_transform(prediction)

# Show prediction
print(f"Predicted Survived for PassengerId {new_df['PassengerId'].iloc[0]}: {prediction[0]}")

Predicted Survived for PassengerId 1001: 1
