In [1]:
# Import model objects and mapping variables
# Import necessary packages

# import data and test the function
import pandas as pd
import numpy as np


df = pd.read_excel('Titanic-Dataset.xlsx')

In [2]:
# read from pickle file model.pkl

import pickle

l = pickle.load(open('model.pkl', 'rb'))
model, sex_dict, embarked_dict = l

In [4]:
# Model deployment

# Gather all variable treatment in one function
def data_preprocessing(df, sex_dict=sex_dict, embarked_dict=embarked_dict):
    
    df['Age'].fillna(df['Age'].mean(), inplace=True)
    df['Sex'] = df['Sex'].map(sex_dict)
    df['Embarked'] = df['Embarked'].map(embarked_dict)
    df['Embarked'].fillna(0, inplace=True)
    df.drop(['Cabin', 'Name', 'Ticket'], axis=1, inplace=True)
    
    return df   

# Create a function to predict the survival of a passenger based on the model
def predict_survival(df):

    df = data_preprocessing(df)
    X = df.drop(['PassengerId', 'Survived'], axis=1)
    prediction = model.predict(X)
    return prediction
    

In [5]:
prediction = predict_survival(df)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Embarked'].fillna(0, inplace=True)


In [10]:
embarked_dict

{'S': 0, 'C': 1, 'Q': 2}

In [None]:
df.iloc[0] # Prediction should be 0

PassengerId     1.00
Survived        0.00
Pclass          3.00
Sex             0.00
Age            22.00
SibSp           1.00
Parch           0.00
Fare            7.25
Embarked        0.00
Name: 0, dtype: float64

In [9]:
df.iloc[1] # Prediction should be 1

PassengerId     2.0000
Survived        1.0000
Pclass          1.0000
Sex             1.0000
Age            38.0000
SibSp           1.0000
Parch           0.0000
Fare           71.2833
Embarked        1.0000
Name: 1, dtype: float64

In [6]:
prediction

array([0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,