In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression

In [2]:
df_train = pd.read_csv('../../Projects/Titanic/train.csv')
df_test = pd.read_csv('../../Projects/Titanic/test.csv')

In [3]:
numeric_features =list(df_train.select_dtypes(exclude = 'object').columns)

In [4]:
numeric_features

['PassengerId', 'Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare']

In [5]:
numeric_features.remove('PassengerId')
numeric_features.remove('Survived')

In [7]:
cat_features =list(df_train.select_dtypes(include = 'object').columns)

In [8]:
cat_features

['Name', 'Sex', 'Ticket', 'Cabin', 'Embarked']

In [9]:
cat_features.remove('Name')
cat_features.remove('Ticket')
cat_features.remove('Cabin')

In [10]:
num_transformer = Pipeline([('imputer_n', SimpleImputer(strategy='median')),('scaler', MinMaxScaler())]) 
cat_transformer = Pipeline([('imputer_c', SimpleImputer(strategy='most_frequent')),('encoder', OneHotEncoder())]) 

In [11]:
preprocessor = ColumnTransformer([('num_pipe', num_transformer, numeric_features),
                                  ('cat_pipe', cat_transformer, cat_features)])

In [12]:
final_pipe = Pipeline([('preprocess_pipe', preprocessor),
                       ('model', LogisticRegression())])

In [14]:
final_pipe.fit(df_train.drop('Survived', axis=1), df_train['Survived'])

Pipeline(steps=[('preprocess_pipe',
                 ColumnTransformer(transformers=[('num_pipe',
                                                  Pipeline(steps=[('imputer_n',
                                                                   SimpleImputer(strategy='median')),
                                                                  ('scaler',
                                                                   MinMaxScaler())]),
                                                  ['Pclass', 'Age', 'SibSp',
                                                   'Parch', 'Fare']),
                                                 ('cat_pipe',
                                                  Pipeline(steps=[('imputer_c',
                                                                   SimpleImputer(strategy='most_frequent')),
                                                                  ('encoder',
                                                                   OneHotEncoder(

In [15]:
final_pipe.predict(df_test)

array([0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,

In [18]:
q0 = df_test.iloc[100:101,:]
q0

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
100,992,1,"Stengel, Mrs. Charles Emil Henry (Annie May Mo...",female,43.0,1,0,11778,55.4417,C116,C


In [19]:
final_pipe.predict(q0)

array([1], dtype=int64)

In [21]:
print('Survived') if final_pipe.predict(q0)[0]==1 else print('Not Survived')

Survived


In [22]:
import pickle

In [23]:
pickle.dump(final_pipe, open('final_pipe.pkl', 'wb'))