## Titanic: Model Deployment

### Importing libraries

In [1]:
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

### Loading the dataset

In [2]:
df = pd.read_csv('./data/raw_data.csv')

X = df.loc[:, df.columns != 'Survived']
y = df.loc[:, 'Survived']

### Creating the pipline

In [3]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

class FeatureDropper(BaseEstimator, TransformerMixin):
    def __init__(self, features_to_drop):
        self.features_to_drop = features_to_drop
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return X.drop(columns=self.features_to_drop, errors='ignore')

#### Feature Selection

In [4]:
FEATS_TO_DROP = ['PassengerId', 'Name', 'Ticket', 'Cabin']

In [5]:
feature_selection = FeatureDropper(FEATS_TO_DROP)

#### Numeric Transformation

In [6]:
NUMERIC_FEATS = ['Age', 'Fare']

In [7]:
from sklearn.preprocessing import Normalizer
from sklearn.impute import SimpleImputer

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('normalizer', Normalizer())
])

#### Categorical Transformation

In [8]:
CATEGORICAL_FEATS = ['Sex', 'Embarked']

In [9]:
from sklearn.preprocessing import OneHotEncoder

categorical_transformer = Pipeline(steps=[
   ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
   ('onehot', OneHotEncoder(handle_unknown='ignore')) 
])

#### Completing the full pipeline

In [10]:
from sklearn.compose import ColumnTransformer

preprocessing = ColumnTransformer(transformers=[
    ('num', numeric_transformer, NUMERIC_FEATS),
    ('cat', categorical_transformer, CATEGORICAL_FEATS)    
])

In [11]:
from sklearn.svm import SVC

svc = SVC(C=100, class_weight=None, degree=2, gamma='scale', kernel='poly')

svc_pipeline = Pipeline(steps=[
    ('feature_selection', feature_selection),
    ('preprocessing', preprocessing),
    ('model', svc)
])

In [12]:
svc_pipeline.fit(X, y)

In [13]:
import joblib

joblib.dump(svc_pipeline, './model/tuned_svc.pkl')

['./model/tuned_svc.pkl']