Load Data

In [1]:
# Import library
import pandas as pd

In [2]:
#Load the dataset
df_train= pd.read_csv('../Datasets/titanic/train.csv')
df_test= pd.read_csv('../Datasets/titanic/test.csv')

In [3]:
df_train.isna().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [4]:
df_test.isna().sum()

PassengerId      0
Pclass           0
Name             0
Sex              0
Age             86
SibSp            0
Parch            0
Ticket           0
Fare             1
Cabin          327
Embarked         0
dtype: int64

# Data Cleaning
####.Imptation

In [5]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [6]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  418 non-null    int64  
 1   Pclass       418 non-null    int64  
 2   Name         418 non-null    object 
 3   Sex          418 non-null    object 
 4   Age          332 non-null    float64
 5   SibSp        418 non-null    int64  
 6   Parch        418 non-null    int64  
 7   Ticket       418 non-null    object 
 8   Fare         417 non-null    float64
 9   Cabin        91 non-null     object 
 10  Embarked     418 non-null    object 
dtypes: float64(2), int64(4), object(5)
memory usage: 36.0+ KB


In [7]:
df_train.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [8]:
df_test.columns

Index(['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch',
       'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [9]:
#Independent & dependent variables
X_cols= list(df_train.columns)
y_col = 'Survived'

In [10]:
#Filling the nan values
df_train['Age'].fillna(df_train['Age'].median(),inplace=True)
df_test['Age'].fillna(df_train['Age'].median(),inplace= True)
df_train['Embarked'].fillna(df_train['Embarked'].mode()[0],inplace=True)
df_test['Embarked'].fillna(df_train['Embarked'].mode()[0],inplace=True)
df_train['Fare'].fillna(df_train['Fare']. median (), inplace=True)
df_test['Fare'].fillna(df_train['Fare']. median (), inplace=True)

In [11]:
df_train['Sex']= df_train['Sex'].apply(lambda X:0 if X=='male' else 1)
df_test['Sex']= df_test['Sex'].apply(lambda X:0 if X=='male' else 1)

In [12]:
#one hot encoding for Embarked
temp = pd.get_dummies(df_train['Embarked'])
df_train = pd.concat([df_train, temp],axis=1)
temp = pd.get_dummies(df_test['Embarked'])
df_test = pd.concat([df_test, temp], axis=1)
X_cols.extend(temp.columns)

In [13]:
#Remove unnecessary columns
X_cols.remove('PassengerId')
X_cols.remove('Survived')
X_cols.remove('Name')
X_cols.remove('Ticket')
X_cols.remove('Cabin')
X_cols.remove('Embarked')

Build ML Model

In [14]:
X = df_train[X_cols]
y = df_train[y_col]

In [15]:
X.corrwith(y)

Pclass   -0.338481
Sex       0.543351
Age      -0.064910
SibSp    -0.035322
Parch     0.081629
Fare      0.257307
C         0.168240
Q         0.003650
S        -0.149683
dtype: float64

In [16]:
# splitting dataset into train & test
from sklearn.model_selection import train_test_split
X_train,X_val,y_train,y_val=train_test_split(X,y,test_size = 0.25, random_state=96)

In [17]:
X_train.shape

(668, 9)

In [18]:
 
# without hyperparameter tunning 
# Random_Forest
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier (n_estimators=11, random_state=12)
model.fit(X_train,y_train)
model.score(X_val,y_val)

0.7982062780269058

In [19]:
 
# with hyperparameter tunning
#Random_Forest
from sklearn.model_selection import GridSearchCV
params={'max_depth':[4,5,6],'min_samples_leaf':[1,2,3,4],'criterion':['gini','entropy'],'n_estimators':[15,25,50,100],'min_samples_split':[4,5,6],'random_state':[12]}
grid_cv=GridSearchCV (RandomForestClassifier (),params,cv=5,verbose=3)
grid_cv.fit(X,y)

Fitting 5 folds for each of 288 candidates, totalling 1440 fits
[CV 1/5] END criterion=gini, max_depth=4, min_samples_leaf=1, min_samples_split=4, n_estimators=15, random_state=12; total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=4, min_samples_leaf=1, min_samples_split=4, n_estimators=15, random_state=12; total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=4, min_samples_leaf=1, min_samples_split=4, n_estimators=15, random_state=12; total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=4, min_samples_leaf=1, min_samples_split=4, n_estimators=15, random_state=12; total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=4, min_samples_leaf=1, min_samples_split=4, n_estimators=15, random_state=12; total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=4, min_samples_leaf=1, min_samples_split=4, n_estimators=25, random_state=12; total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=4, min_samples_leaf=1, min_samples_split=4, n_estimators=25, random_state=12; tot

[CV 5/5] END criterion=gini, max_depth=4, min_samples_leaf=2, min_samples_split=4, n_estimators=15, random_state=12; total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=4, min_samples_leaf=2, min_samples_split=4, n_estimators=25, random_state=12; total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=4, min_samples_leaf=2, min_samples_split=4, n_estimators=25, random_state=12; total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=4, min_samples_leaf=2, min_samples_split=4, n_estimators=25, random_state=12; total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=4, min_samples_leaf=2, min_samples_split=4, n_estimators=25, random_state=12; total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=4, min_samples_leaf=2, min_samples_split=4, n_estimators=25, random_state=12; total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=4, min_samples_leaf=2, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=4, min_sa

[CV 3/5] END criterion=gini, max_depth=4, min_samples_leaf=3, min_samples_split=4, n_estimators=25, random_state=12; total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=4, min_samples_leaf=3, min_samples_split=4, n_estimators=25, random_state=12; total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=4, min_samples_leaf=3, min_samples_split=4, n_estimators=25, random_state=12; total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=4, min_samples_leaf=3, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 2/5] END criterion=gini, max_depth=4, min_samples_leaf=3, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=4, min_samples_leaf=3, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=4, min_samples_leaf=3, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=4, min_sa

[CV 4/5] END criterion=gini, max_depth=4, min_samples_leaf=4, min_samples_split=4, n_estimators=25, random_state=12; total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=4, min_samples_leaf=4, min_samples_split=4, n_estimators=25, random_state=12; total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=4, min_samples_leaf=4, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=4, min_samples_leaf=4, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=4, min_samples_leaf=4, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=4, min_samples_leaf=4, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=4, min_samples_leaf=4, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=4, min_sa

[CV 1/5] END criterion=gini, max_depth=5, min_samples_leaf=1, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=5, min_samples_leaf=1, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=5, min_samples_leaf=1, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=5, min_samples_leaf=1, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=5, min_samples_leaf=1, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 1/5] END criterion=gini, max_depth=5, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 2/5] END criterion=gini, max_depth=5, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 3/5] END criterion=gini, max_depth=5, min_

[CV 3/5] END criterion=gini, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 2/5] END criterion=gini, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 3/5] END criterion=gini, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 4/5] END criterion=gini, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=gini, max_depth=5, mi

[CV 4/5] END criterion=gini, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 2/5] END criterion=gini, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 3/5] END criterion=gini, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 4/5] END criterion=gini, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 5/5] END criterion=gini, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 1/5] END criterion=gini, max_depth=5, m

[CV 1/5] END criterion=gini, max_depth=5, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 2/5] END criterion=gini, max_depth=5, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 3/5] END criterion=gini, max_depth=5, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 4/5] END criterion=gini, max_depth=5, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=gini, max_depth=5, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 1/5] END criterion=gini, max_depth=5, min_samples_leaf=4, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=5, min_samples_leaf=4, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=5, m

[CV 2/5] END criterion=gini, max_depth=6, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 3/5] END criterion=gini, max_depth=6, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 4/5] END criterion=gini, max_depth=6, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=gini, max_depth=6, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 1/5] END criterion=gini, max_depth=6, min_samples_leaf=1, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=6, min_samples_leaf=1, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=6, min_samples_leaf=1, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=6, mi

[CV 3/5] END criterion=gini, max_depth=6, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 4/5] END criterion=gini, max_depth=6, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 5/5] END criterion=gini, max_depth=6, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 1/5] END criterion=gini, max_depth=6, min_samples_leaf=2, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=6, min_samples_leaf=2, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=6, min_samples_leaf=2, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=6, min_samples_leaf=2, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=6, min

[CV 4/5] END criterion=gini, max_depth=6, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=gini, max_depth=6, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 1/5] END criterion=gini, max_depth=6, min_samples_leaf=3, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=6, min_samples_leaf=3, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=6, min_samples_leaf=3, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=6, min_samples_leaf=3, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=6, min_samples_leaf=3, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=6, min_

[CV 5/5] END criterion=gini, max_depth=6, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 1/5] END criterion=gini, max_depth=6, min_samples_leaf=4, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=6, min_samples_leaf=4, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 3/5] END criterion=gini, max_depth=6, min_samples_leaf=4, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 4/5] END criterion=gini, max_depth=6, min_samples_leaf=4, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 5/5] END criterion=gini, max_depth=6, min_samples_leaf=4, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 1/5] END criterion=gini, max_depth=6, min_samples_leaf=4, min_samples_split=5, n_estimators=25, random_state=12; total time=   0.0s
[CV 2/5] END criterion=gini, max_depth=6, min_s

[CV 5/5] END criterion=entropy, max_depth=4, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 1/5] END criterion=entropy, max_depth=4, min_samples_leaf=1, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 2/5] END criterion=entropy, max_depth=4, min_samples_leaf=1, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 3/5] END criterion=entropy, max_depth=4, min_samples_leaf=1, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 4/5] END criterion=entropy, max_depth=4, min_samples_leaf=1, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 5/5] END criterion=entropy, max_depth=4, min_samples_leaf=1, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 1/5] END criterion=entropy, max_depth=4, min_samples_leaf=1, min_samples_split=5, n_estimators=25, random_state=12; total time=   0.0s
[CV 2/5] END criterion=ent

[CV 4/5] END criterion=entropy, max_depth=4, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=entropy, max_depth=4, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 1/5] END criterion=entropy, max_depth=4, min_samples_leaf=2, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 2/5] END criterion=entropy, max_depth=4, min_samples_leaf=2, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 3/5] END criterion=entropy, max_depth=4, min_samples_leaf=2, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 4/5] END criterion=entropy, max_depth=4, min_samples_leaf=2, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 5/5] END criterion=entropy, max_depth=4, min_samples_leaf=2, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 1/5] END criterion=en

[CV 3/5] END criterion=entropy, max_depth=4, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=4, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=entropy, max_depth=4, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 1/5] END criterion=entropy, max_depth=4, min_samples_leaf=3, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 2/5] END criterion=entropy, max_depth=4, min_samples_leaf=3, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 3/5] END criterion=entropy, max_depth=4, min_samples_leaf=3, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 4/5] END criterion=entropy, max_depth=4, min_samples_leaf=3, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 5/5] END criterion=e

[CV 2/5] END criterion=entropy, max_depth=4, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 3/5] END criterion=entropy, max_depth=4, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=4, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=entropy, max_depth=4, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 1/5] END criterion=entropy, max_depth=4, min_samples_leaf=4, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 2/5] END criterion=entropy, max_depth=4, min_samples_leaf=4, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 3/5] END criterion=entropy, max_depth=4, min_samples_leaf=4, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 4/5] END criterion=

[CV 1/5] END criterion=entropy, max_depth=5, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 2/5] END criterion=entropy, max_depth=5, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 3/5] END criterion=entropy, max_depth=5, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=5, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=entropy, max_depth=5, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 1/5] END criterion=entropy, max_depth=5, min_samples_leaf=1, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 2/5] END criterion=entropy, max_depth=5, min_samples_leaf=1, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 3/5] END criterion

[CV 5/5] END criterion=entropy, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 1/5] END criterion=entropy, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 2/5] END criterion=entropy, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 3/5] END criterion=entropy, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=entropy, max_depth=5, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 1/5] END criterion=entropy, max_depth=5, min_samples_leaf=2, min_samples_split=5, n_estimators=15, random_state=12; total time=   0.0s
[CV 2/5] END criterion

[CV 4/5] END criterion=entropy, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 5/5] END criterion=entropy, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 1/5] END criterion=entropy, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 2/5] END criterion=entropy, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.1s
[CV 3/5] END criterion=entropy, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 4/5] END criterion=entropy, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=entropy, max_depth=5, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 1/5] END criterion

[CV 4/5] END criterion=entropy, max_depth=5, min_samples_leaf=4, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 5/5] END criterion=entropy, max_depth=5, min_samples_leaf=4, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 1/5] END criterion=entropy, max_depth=5, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 2/5] END criterion=entropy, max_depth=5, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 3/5] END criterion=entropy, max_depth=5, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 4/5] END criterion=entropy, max_depth=5, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=entropy, max_depth=5, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 1/5] END criterion

[CV 3/5] END criterion=entropy, max_depth=6, min_samples_leaf=1, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=6, min_samples_leaf=1, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 5/5] END criterion=entropy, max_depth=6, min_samples_leaf=1, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 1/5] END criterion=entropy, max_depth=6, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 2/5] END criterion=entropy, max_depth=6, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 3/5] END criterion=entropy, max_depth=6, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 4/5] END criterion=entropy, max_depth=6, min_samples_leaf=1, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=

[CV 3/5] END criterion=entropy, max_depth=6, min_samples_leaf=2, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 4/5] END criterion=entropy, max_depth=6, min_samples_leaf=2, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 5/5] END criterion=entropy, max_depth=6, min_samples_leaf=2, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 1/5] END criterion=entropy, max_depth=6, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 2/5] END criterion=entropy, max_depth=6, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 3/5] END criterion=entropy, max_depth=6, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 4/5] END criterion=entropy, max_depth=6, min_samples_leaf=2, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=

[CV 3/5] END criterion=entropy, max_depth=6, min_samples_leaf=3, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 4/5] END criterion=entropy, max_depth=6, min_samples_leaf=3, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 5/5] END criterion=entropy, max_depth=6, min_samples_leaf=3, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 1/5] END criterion=entropy, max_depth=6, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 2/5] END criterion=entropy, max_depth=6, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 3/5] END criterion=entropy, max_depth=6, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 4/5] END criterion=entropy, max_depth=6, min_samples_leaf=3, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 5/5] END criterion=

[CV 2/5] END criterion=entropy, max_depth=6, min_samples_leaf=4, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 3/5] END criterion=entropy, max_depth=6, min_samples_leaf=4, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 4/5] END criterion=entropy, max_depth=6, min_samples_leaf=4, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.0s
[CV 5/5] END criterion=entropy, max_depth=6, min_samples_leaf=4, min_samples_split=4, n_estimators=50, random_state=12; total time=   0.1s
[CV 1/5] END criterion=entropy, max_depth=6, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 2/5] END criterion=entropy, max_depth=6, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 3/5] END criterion=entropy, max_depth=6, min_samples_leaf=4, min_samples_split=4, n_estimators=100, random_state=12; total time=   0.2s
[CV 4/5] END criterion=e

GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [4, 5, 6],
                         'min_samples_leaf': [1, 2, 3, 4],
                         'min_samples_split': [4, 5, 6],
                         'n_estimators': [15, 25, 50, 100],
                         'random_state': [12]},
             verbose=3)

In [20]:
#To find the best parameters
grid_cv.best_params_

{'criterion': 'entropy',
 'max_depth': 6,
 'min_samples_leaf': 1,
 'min_samples_split': 6,
 'n_estimators': 15,
 'random_state': 12}

In [21]:
model=RandomForestClassifier (criterion= 'entropy',max_depth= 6,min_samples_leaf= 1,min_samples_split=6,n_estimators= 15, random_state=12)
model.fit(X_train,y_train)
model.score(X_val,y_val)

0.8385650224215246

In [39]:
model_final=RandomForestClassifier (criterion= 'entropy',max_depth= 6,min_samples_leaf= 1,min_samples_split=6,n_estimators= 15)
model_final.fit(X,y)

RandomForestClassifier(criterion='entropy', max_depth=6, min_samples_split=6,
                       n_estimators=15)

In [40]:
#make prediction
y_pre=model_final.predict(df_test[X_cols])
y_pre

array([0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
       0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,

In [41]:
sum(y_pre)

133