# Stacking

### Importing Libraries

In [1]:
#importing necessary libraries
import numpy as np
import pandas as pd

### Importing Dataset

In [2]:
data = pd.read_csv('data_cleaned.csv')

#printing the first few rows
data.head()

Unnamed: 0,Survived,Age,Fare,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,SibSp_0,SibSp_1,...,Parch_0,Parch_1,Parch_2,Parch_3,Parch_4,Parch_5,Parch_6,Embarked_C,Embarked_Q,Embarked_S
0,0,22.0,7.25,0,0,1,0,1,0,1,...,1,0,0,0,0,0,0,0,0,1
1,1,38.0,71.2833,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,1,0,0
2,1,26.0,7.925,0,0,1,1,0,1,0,...,1,0,0,0,0,0,0,0,0,1
3,1,35.0,53.1,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,1
4,0,35.0,8.05,0,0,1,0,1,1,0,...,1,0,0,0,0,0,0,0,0,1


In [3]:
#size of the data
data.shape

(891, 25)

In [4]:
#checking missing values
data.isnull().sum()

Survived      0
Age           0
Fare          0
Pclass_1      0
Pclass_2      0
Pclass_3      0
Sex_female    0
Sex_male      0
SibSp_0       0
SibSp_1       0
SibSp_2       0
SibSp_3       0
SibSp_4       0
SibSp_5       0
SibSp_8       0
Parch_0       0
Parch_1       0
Parch_2       0
Parch_3       0
Parch_4       0
Parch_5       0
Parch_6       0
Embarked_C    0
Embarked_Q    0
Embarked_S    0
dtype: int64

### Separating Dependent and Independent Variables

In [5]:
#independent variables
x = data.drop(["Survived"], axis = 1)

#dependent variables
y = data['Survived']

x.shape, y.shape

((891, 24), (891,))

### Making test and training set

In [6]:
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score

train_x, test_x, train_y, test_y = tts (x, y, random_state = 9 , stratify = y)
train_x.shape, test_x.shape, train_y.shape, test_y.shape

((668, 24), (223, 24), (668,), (223,))

## Base models 

In [7]:
#importing predictive models
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import classification_report,confusion_matrix

<img src="stacking image.png" alt="Drawing" style="width: 400px;"/>

### Model Training and Predictions 

In [8]:
def model_predictions(model, train_x, train_y, test_x):
    
    #train the model
    model.fit(train_x,train_y)
    
    #storing predictions for train and test
    pred_train=model.predict(train_x)
    pred_test=model.predict(test_x)
    return pred_train, pred_test

In [9]:
#Model 1 - Decision Tree
DT=DecisionTreeClassifier(random_state= 101)
M1_train, M1_test = model_predictions(DT, train_x, train_y, test_x)
accuracy_score(M1_test,test_y)


0.726457399103139

In [10]:

print(classification_report(test_y,M1_test), confusion_matrix(test_y,M1_test))

              precision    recall  f1-score   support

           0       0.79      0.75      0.77       137
           1       0.63      0.69      0.66        86

    accuracy                           0.73       223
   macro avg       0.71      0.72      0.72       223
weighted avg       0.73      0.73      0.73       223
 [[103  34]
 [ 27  59]]


In [11]:
#Feature Scaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X=train_x)

train_x = scaler.transform(train_x)
test_x = scaler.transform(test_x)

In [12]:
#Model 2 - Logistic Regression
LR=LogisticRegression(random_state= 101)
M2_train, M2_test = model_predictions(LR, train_x, train_y, test_x)

print('Accuracy score: ',accuracy_score(M2_test,test_y))
print(classification_report(test_y,M2_test), confusion_matrix(test_y,M2_test))

Accuracy score:  0.7937219730941704
              precision    recall  f1-score   support

           0       0.83      0.84      0.83       137
           1       0.74      0.72      0.73        86

    accuracy                           0.79       223
   macro avg       0.78      0.78      0.78       223
weighted avg       0.79      0.79      0.79       223
 [[115  22]
 [ 24  62]]


In [13]:
#Model 3 - k Nearest Neighbour
knn=KNeighborsClassifier()
M3_train, M3_test = model_predictions(knn, train_x, train_y, test_x)

print('Accuracy score: ',accuracy_score(M3_test,test_y))
print(classification_report(test_y,M3_test), confusion_matrix(test_y,M3_test))

Accuracy score:  0.7399103139013453
              precision    recall  f1-score   support

           0       0.77      0.82      0.79       137
           1       0.68      0.62      0.65        86

    accuracy                           0.74       223
   macro avg       0.73      0.72      0.72       223
weighted avg       0.74      0.74      0.74       223
 [[112  25]
 [ 33  53]]


In [14]:
# Model 4-SVC Classifier
svc = SVC()
M4_train, M4_test = model_predictions(svc, train_x, train_y, test_x)

print('Accuracy score: ',accuracy_score(M4_test,test_y))
print(classification_report(test_y,M4_test), confusion_matrix(test_y,M4_test))

Accuracy score:  0.8026905829596412
              precision    recall  f1-score   support

           0       0.83      0.86      0.84       137
           1       0.76      0.71      0.73        86

    accuracy                           0.80       223
   macro avg       0.79      0.79      0.79       223
weighted avg       0.80      0.80      0.80       223
 [[118  19]
 [ 25  61]]


In [15]:
# Model 4-SVC Classifier
rfc = RandomForestClassifier()
M5_train, M5_test = model_predictions(rfc, train_x, train_y, test_x)

print('Accuracy score: ',accuracy_score(M5_test,test_y))
print(classification_report(test_y,M5_test), confusion_matrix(test_y,M5_test))

Accuracy score:  0.7668161434977578
              precision    recall  f1-score   support

           0       0.81      0.80      0.81       137
           1       0.69      0.71      0.70        86

    accuracy                           0.77       223
   macro avg       0.75      0.76      0.75       223
weighted avg       0.77      0.77      0.77       223
 [[110  27]
 [ 25  61]]


## Stacking Model

<img src="stacking image 2.png" alt="Drawing" style="width: 500px;"/>

In [16]:
#Creating a New train dataframe
train_prediction = {
              'DT': M1_train,
              'LR': M2_train,
              'knn': M3_train,
              'svc': M4_train,
              'rfc': M5_train}

train_predictions = pd.DataFrame(train_prediction)
train_predictions.head()

Unnamed: 0,DT,LR,knn,svc,rfc
0,1,1,0,1,1
1,1,1,1,1,1
2,1,1,0,1,1
3,0,1,1,1,0
4,0,0,0,0,0


In [17]:
#Creating a New test dataframe
test_prediction = {
              'DT': M1_test,
              'LR': M2_test,
              'knn': M3_test,
              'svc': M4_test,
              'rfc':M5_test
              }
test_predictions = pd.DataFrame(test_prediction)
test_predictions.head(15)

Unnamed: 0,DT,LR,knn,svc,rfc
0,0,0,0,0,0
1,1,1,1,1,1
2,0,0,0,0,1
3,1,0,0,0,0
4,0,0,0,0,0
5,1,0,0,0,1
6,0,0,0,0,0
7,0,1,0,1,1
8,0,1,1,1,1
9,0,1,1,1,1


In [18]:
# Stacker Model
model = LogisticRegression()
model.fit(train_predictions, train_y)
model.score(test_predictions,test_y)
f_pred = model.predict(test_predictions)

print('Accuracy score: ',accuracy_score(f_pred,test_y))
print(classification_report(test_y,f_pred), confusion_matrix(test_y,f_pred))

Accuracy score:  0.7533632286995515
              precision    recall  f1-score   support

           0       0.81      0.78      0.80       137
           1       0.67      0.71      0.69        86

    accuracy                           0.75       223
   macro avg       0.74      0.75      0.74       223
weighted avg       0.76      0.75      0.75       223
 [[107  30]
 [ 25  61]]
