# import packages

In [None]:
import os
import sys
import pandas as pd
import numpy as np
seed= 7
np.seed= seed
import gc
import importlib
import pickle
from pprint import pprint
import time


# visualizations
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns; sns.set(style="whitegrid", font_scale= 1.5)
from IPython.display import display

# modelling
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix

from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb

# Neural Networks
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
import tensorflow as tf
from keras.utils import np_utils

# import data

In [None]:
path= '/kaggle/input/census_income_data.csv'
data= pd.read_csv(path)

print('data shape: ', data.shape)
display(data.head())
display(data.tail())

**splitting the data into train and test**

In [None]:
train= data.loc[ (data['income']== ' <=50K') | (data['income']== ' >50K') ]
print('train shape: ', train.shape)

test= data.loc[ ~ data.index.isin(train.index) ]
# ignore the output column
# we save the output column in y_test
y_test= test.copy().iloc[:, -1]
# but we can also dummy encode the variable
y_test= y_test.apply(lambda x: 1 if x==' >50K.' else 0 )

test= test.iloc[:, :-1] 
print('test shape: ', test.shape)

# we also correct the indices of both train and test data
train.index= range(len(train))
test.index= range(len(test))

# Feature Engineering

In [None]:
#display( train.head(1) )
#display( test.head(1) )

**General Feature Engineering**

1.`Combing both train and test so feature engineering is applied to both

2. Label Ecndoing

In [None]:
# combine both so the feature engineering is applied once

df= pd.concat( [train, test], sort= False)
df.index= range(len(df))

# label encode
cat_features= ['workclass', 'education',  'marital_status', 'occupation', 'relationship', 'race', 
               'sex', 'native_country']

enc= LabelEncoder()
for col in cat_features:
    df[col]= enc.fit_transform( df[col])

df['income']= df['income'].apply( lambda x: 1 if x==' >50K' else 0)

# make sure all the features are of type float except the response variable!
for col in df.columns[:-1]:
    df[col]= df[col].astype(float)

display(df.head())

**Feature Engineering Schemes**

1. Drop variable **fnlwgt**

2. Get the feature importance based on the XGBoost model

3. Based on the feature importance, build cartesian product of categorical features or drop some features and check for performance improvement


**train test split again and saving the results to be run on kaggle engine**

In [None]:
to_drop= ['fnlwgt', 'income']
X_train= df.iloc[ :len(train), :].drop(columns= to_drop).values
y_train= df['income'].iloc[ :len(train)].values
X_test= df.iloc[ len(train):, :].drop(columns= to_drop) .values
y_test= y_test.values
# we already have y_test
features= list(df.columns)
for col in to_drop:
    features.remove(col)

print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print('X_test shape: ', X_test.shape)
print('y_test shape: ', y_test.shape)
print('features: ', features )

# XGBoost model

In [None]:
def fit_xgboost(X, y, init_params, tuning_params, metric= 'auc', cv= 5, seed= 42):
    """
    fits XGboost model on a dataset
    Arguments:
        1. X: X_train, numpy array
        2. y: y_train, numpy array
        3. init_params: initial parameters as a dictionary to start 
            For ex. 
            init_params = {
                'eta':0.2,
                'max_depth': 8,
                'min_child_weight': 3,
                'colsample_bytree': 0.8,
                'subsample': 0.8,
                'gamma': 0, 
                'reg_lambda': 1,
                'reg_alpha': 0,
                'scale_pos_weight': 3.15,
                'objective':'binary:logistic'}
        4. tuning_params: a dictionary of tuples, where the key shows the parameter to be
            changed for gridsearch and the value shows the possible values as a list.
            list because of gridsearch
        5. cv: number of folds
    """
    # find pos_weight: num(zeros)/ num(ones)
    counts= pd.Series(y_train).value_counts()
    pos_weight= np.round(counts.loc[0] / counts.loc[1], 2 )
    # change scale_pos_weight
    init_params['scale_pos_weight']= pos_weight
    
    # build dataset special for xgboost 
    dtrain = xgb.DMatrix(X_train, label= y_train)
    
    # find initial number of trees for a set of initial parameters
    print('Finding the initial number of trees with the initial parameters...')
    cv_results = xgb.cv( init_params, 
                         dtrain,  
                         num_boost_round= 1000, 
                         seed= seed, 
                         nfold= cv, 
                         stratified= True,
                         metrics= {metric},   
                         early_stopping_rounds= 50)
    ntrees= len(cv_results)
    print('The initial number of trees is %d' %ntrees)
    print('*'*10)
    
    # fixing randomness
    np.random.seed(seed)
    import random
    random.seed(seed)
    
    def XGB_gridsearch(X1, y1, estimator, params, cv= cv ):
        """
        After finding the initial number of trees to start with, we have to adjust other hyper-
        parameters as explained before with gridsearch
        ARguments:
            X1: X_train, numpy array
            y1: y_train, numpy array
            cv: number of cv folds, int
            params: parameters for gridsearch, dictionary with the following keys
                ex.
                parameters = {
                'n_estimators': n_estimators,
                'learning_rate': learning_rate,
                'max_depth': max_depth,
                'min_child_weight': min_child_weight,
                'gamma': gamma,
                'colsample_bytree': colsample_bytree,
                'subsample': subsample,
                'reg_lambda': reg_lambda,
                'reg_alpha': reg_alpha,
                'scale_pos_weight': scale_pos_weight,
                'objective': ['binary:logistic'] }
        """
        # There is not a match between gridsearchcv scoring naming and that of xgboost naming
        if metric== 'auc':
            scoring = 'roc_auc'
            grid_search = GridSearchCV(estimator = estimator, param_grid = params, 
                                 scoring= scoring, cv = cv, n_jobs= -1, verbose = 2)
        else:
            grid_search = GridSearchCV(estimator = estimator, param_grid = params, 
                                 scoring= metric, cv = cv, n_jobs= -1, verbose = 2)
        grid_search.fit(X1, y1)

        return grid_search.best_estimator_, grid_search.best_params_, grid_search.best_score_
    ##############################################
    # This is a base estimator for grid search
    xgb_clf= xgb.XGBClassifier(random_state= seed )
    
    # essentially it is the same as init_params but with one extra key n_estimators
    # besides, the values are in form of a list because we are grid searching
    parameters= init_params
    # change eta into learning rate
    parameters['learning_rate']= parameters['eta']; del(parameters['eta'])
    # add n_estimators
    parameters['n_estimators']= ntrees
    # convert values into lists
    for k, v in parameters.items():
            parameters[k]= [ v ]
    
    tuning_order= [ ['max_depth', 'min_child_weight'], ['gamma'], 
                   [ 'colsample_bytree', 'subsample'], ['reg_lambda'], ['reg_alpha'] ]
    for stage in tuning_order:
        for el in stage:
            parameters[el]= tuning_params[el]
        
        print('grid-searching for " %s "' %str(stage))
        #import pdb; pdb.set_trace()
        model= XGB_gridsearch(X1= X, y1= y, estimator= xgb_clf, cv= cv, params= parameters )
        updated_params= model[1]
        updated_score= model[2]
        
        for el in stage:
            parameters[el]= [ updated_params[el] ]
            
        print('updated parameters...')
        print('The current score for metric "%s" is: %0.4f ' %( metric, updated_score) )
        print('*'*10)
    
    print('Found all the hyperparameters for the initial number of trees: ntrees= %d' %ntrees)
    hyperparameters= parameters
    for k,v in hyperparameters.items():
        hyperparameters[k]= v[0] # delisting
        
    print('Now we will start the process of learning by the found hyperparameters and by \
considering a low learning rate\n')
    
    hyperparameters['eta']= 0.01
    del(hyperparameters['learning_rate'], hyperparameters['n_estimators'])
    
    cv_results = xgb.cv( hyperparameters, 
                     dtrain,  
                     num_boost_round= 3000, 
                     seed= seed, 
                     nfold= cv, 
                     stratified= True,
                     metrics= { metric },   
                     early_stopping_rounds= 100)
    print('The optimum number of trees is: %d' %len(cv_results))
    print('train score for metric "%s" is: %0.04f' % (metric, cv_results.iloc[-1, 0]))
    print('test score for metric "%s" is: %0.04f' % (metric, cv_results.iloc[-1, 2]))
    
    # prepare for output
    hyperparameters['learning_rate']= 0.01; del(hyperparameters['eta'])
    hyperparameters['n_estimators']= len(cv_results)
    print('The best hyperparameters are: \n')
    print(hyperparameters)
    return hyperparameters, cv_results

In [None]:
import time
start= time.time()

hyperparameters, cv_results= fit_xgboost(X_train, y_train, init_params= {
                'eta':0.2,
                'max_depth': 8,
                'min_child_weight': 3,
                'colsample_bytree': 0.8,
                'subsample': 0.8,
                'gamma': 0, 
                'reg_lambda': 1,
                'reg_alpha': 0,
                'scale_pos_weight': 3.15,
                'objective':'binary:logistic'}, 
                tuning_params= {
                'learning_rate': [0.2],
                'max_depth': [3, 4, 5, 6, 7, 8, 9, 10],
                'min_child_weight': [1, 2, 3, 4, 5],
                'colsample_bytree': [i/10.0 for i in range(4,10)],
                'subsample': [i/10.0 for i in range(6,10)],
                'gamma': [i/10.0 for i in range(0,5)], 
                'reg_lambda': [1e-5, 1e-2, 0.1, 1, 10, 100],
                'reg_alpha': [1e-5, 1e-2, 0.1, 1, 10, 100],
                'scale_pos_weight': [3.15],
                'objective': ['binary:logistic']}, metric= 'auc', cv= 5, seed= 42
           )

print('\n\nThe runtime took %.2f seconds.' %( time.time() - start ) )

In [None]:
pprint(hyperparameters)

In [None]:
display(cv_results.tail())

**Fit a XGBoost model with the scikit-learn wrapper of XGBoost and the found hyperparameters**

In [None]:
xgb_clf= xgb.XGBClassifier( random_state= seed,  **hyperparameters )
xgb_clf.fit(X_train, y_train,
                   eval_set=[(X_train, y_train), (X_test, y_test)],
                   eval_metric= 'auc',
                   #early_stopping_rounds= 100, 
                   verbose= False)
evals_result = xgb_clf.evals_result()

evals_result= pd.DataFrame( {'train_auc': evals_result['validation_0']['auc'], 
              'test_auc': evals_result['validation_1']['auc']} )
display(evals_result.head())
display(evals_result.tail())

# Visualizations

In [None]:
plt.figure(figsize= (10, 5))
plt.plot( evals_result['train_auc'], label= 'train_auc')
plt.plot( evals_result['test_auc'], label= 'real_test_auc')
plt.plot( cv_results['test-auc-mean'], label= 'cv_auc')
plt.xlabel('number of trees')
plt.ylabel('roc_auc')
plt.title('fnlwgt dropped!')
plt.legend()

**confusion matrix**

In [None]:
y_pred= xgb_clf.predict(X_train)
print('Confusion matrix for he train data')
display( confusion_matrix(y_pred, y_train) )

y_test_pred= xgb_clf.predict(X_test)
print('Confusion matrix for the test data')
display( confusion_matrix(y_test_pred, y_test) )

# feature importance

In [None]:
features_importance= list( zip( features, xgb_clf.feature_importances_) )
features_importance= sorted(features_importance, key= lambda x: x[1], reverse= True)

plt.figure(figsize= (10,5))
plt.title("Feature importances")
plt.bar(range(len(features)), [el[1] for el in features_importance],  color="r",  align="center")
plt.xticks(range(len(features)), [el[0] for el in features_importance], rotation= 90)
plt.xlim([-1, len(features)])

# more feature engineering ideas

In [None]:
def cartesian(df, col1, col2):
    """
    The cartesian product of two categorical columns from a dataframe is calculated and returned as a 
    Pandas Series
    """
    
    temp= df[col1].astype(str).apply(lambda x: '(' + x ) + ', ' +  df[col2].astype(str).\
                                                    apply(lambda y: y + ')' )
    return temp

In [None]:
df.shape

In [None]:
"""# add features
df['rms']= cartesian(df, 'relationship', 'marital_status')
# label encode it
enc= LabelEncoder()
df['rms']= enc.fit_transform( df['rms'])"""

# we already have y_train and y_test
# we just drop the columns we don't want 
to_drop= ['fnlwgt', 'income', 'marital_status']
X_train= df.iloc[ :len(train), :].drop(columns= to_drop).values
X_test= df.iloc[ len(train):, :].drop(columns= to_drop) .values
features= list(df.columns)
for col in to_drop:
    features.remove(col)
    


print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print('X_test shape: ', X_test.shape)
print('y_test shape: ', y_test.shape)
print('features: ', features )

In [None]:
start= time.time()

hyperparameters, cv_results= fit_xgboost(X_train, y_train, init_params= {
                'eta':0.2,
                'max_depth': 8,
                'min_child_weight': 3,
                'colsample_bytree': 0.8,
                'subsample': 0.8,
                'gamma': 0, 
                'reg_lambda': 1,
                'reg_alpha': 0,
                'scale_pos_weight': 3.15,
                'objective':'binary:logistic'}, 
                tuning_params= {
                'learning_rate': [0.2],
                'max_depth': [3, 4, 5, 6, 7, 8, 9, 10],
                'min_child_weight': [1, 2, 3, 4, 5],
                'colsample_bytree': [i/10.0 for i in range(4,10)],
                'subsample': [i/10.0 for i in range(6,10)],
                'gamma': [i/10.0 for i in range(0,5)], 
                'reg_lambda': [1e-5, 1e-2, 0.1, 1, 10, 100],
                'reg_alpha': [1e-5, 1e-2, 0.1, 1, 10, 100],
                'scale_pos_weight': [3.15],
                'objective': ['binary:logistic']}, metric= 'auc', cv= 5, seed= 42
           )

print('\n\nThe runtime took %.2f seconds.' %( time.time() - start ) )

In [None]:
xgb_clf= xgb.XGBClassifier( random_state= seed,  **hyperparameters )
xgb_clf.fit(X_train, y_train,
                   eval_set=[(X_train, y_train), (X_test, y_test)],
                   eval_metric= 'auc',
                   #early_stopping_rounds= 100, 
                   verbose= False)
evals_result = xgb_clf.evals_result()

evals_result= pd.DataFrame( {'train_auc': evals_result['validation_0']['auc'], 
              'test_auc': evals_result['validation_1']['auc']} )
display(evals_result.head())
display(evals_result.tail())

In [None]:
plt.figure(figsize= (10, 5))
plt.plot( evals_result['train_auc'], label= 'train_auc')
plt.plot( evals_result['test_auc'], label= 'real_test_auc')
plt.plot( cv_results['test-auc-mean'], label= 'cv_auc')
plt.xlabel('number of trees')
plt.ylabel('roc_auc')
plt.title('fnlwgt dropped!')
plt.legend()

In [None]:
y_test_pred= xgb_clf.predict(X_test)
confusion_matrix(y_test_pred, y_test)

In [None]:
features_importance= list( zip( features, xgb_clf.feature_importances_) )
features_importance= sorted(features_importance, key= lambda x: x[1], reverse= True)

plt.figure(figsize= (10,5))
plt.title("Feature importances")
plt.bar(range(len(features)), [el[1] for el in features_importance],  color="r",  align="center")
plt.xticks(range(len(features)), [el[0] for el in features_importance], rotation= 90)
plt.xlim([-1, len(features)])

# XGBoost with dummy encoding of the categorical features

In [None]:
# combine both so the feature engineering is applied once

df_dummy= pd.concat( [train, test], sort= False)
df_dummy.index= range(len(df_dummy))

# label encode
cat_features= ['workclass', 'education',  'marital_status', 'occupation', 'relationship', 'race', 
               'sex', 'native_country']

for col in cat_features:
    df_temp= pd.get_dummies(df_dummy[col])
    # change the column names
    df_temp.columns= [col+'_'+el for el in df_temp.columns]
    # drop the original column
    df_dummy.drop(columns= col, inplace= True)
    # add the newly obtained columns to df
    df_dummy= pd.concat( [df_dummy, df_temp], axis= 1)


df_dummy['income']= df_dummy['income'].apply( lambda x: 1 if x==' >50K' else 0)

display(df_dummy.head(1))

In [None]:

# we already have y_train and y_test
# we just drop the columns we don't want 
to_drop= ['fnlwgt', 'income']
X_train= df_dummy.iloc[ :len(train), :].drop(columns= to_drop).values
X_test= df_dummy.iloc[ len(train):, :].drop(columns= to_drop) .values
features= list(df_dummy.columns)
for col in to_drop:
    features.remove(col)
    

print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print('X_test shape: ', X_test.shape)
print('y_test shape: ', y_test.shape)
print('features: ', features )
###################################################################
start= time.time()

hyperparameters, cv_results= fit_xgboost(X_train, y_train, init_params= {
                'eta':0.2,
                'max_depth': 8,
                'min_child_weight': 3,
                'colsample_bytree': 0.8,
                'subsample': 0.8,
                'gamma': 0, 
                'reg_lambda': 1,
                'reg_alpha': 0,
                'scale_pos_weight': 3.15,
                'objective':'binary:logistic'}, 
                tuning_params= {
                'learning_rate': [0.2],
                'max_depth': [3, 4, 5, 6, 7, 8, 9, 10],
                'min_child_weight': [1, 2, 3, 4, 5],
                'colsample_bytree': [i/10.0 for i in range(4,10)],
                'subsample': [i/10.0 for i in range(6,10)],
                'gamma': [i/10.0 for i in range(0,5)], 
                'reg_lambda': [1e-5, 1e-2, 0.1, 1, 10, 100],
                'reg_alpha': [1e-5, 1e-2, 0.1, 1, 10, 100],
                'scale_pos_weight': [3.15],
                'objective': ['binary:logistic']}, metric= 'auc', cv= 5, seed= 42
           )

print('\n\nThe runtime took %.2f seconds.' %( time.time() - start ) )
#############################################################
xgb_clf= xgb.XGBClassifier( random_state= seed,  **hyperparameters )
xgb_clf.fit(X_train, y_train,
                   eval_set=[(X_train, y_train), (X_test, y_test)],
                   eval_metric= 'auc',
                   #early_stopping_rounds= 100, 
                   verbose= False)
evals_result = xgb_clf.evals_result()

evals_result= pd.DataFrame( {'train_auc': evals_result['validation_0']['auc'], 
              'test_auc': evals_result['validation_1']['auc']} )
display(evals_result.head())
display(evals_result.tail())
#############################################################
plt.figure(figsize= (10, 5))
plt.plot( evals_result['train_auc'], label= 'train_auc')
plt.plot( evals_result['test_auc'], label= 'real_test_auc')
plt.plot( cv_results['test-auc-mean'], label= 'cv_auc')
plt.xlabel('number of trees')
plt.ylabel('roc_auc')
plt.title('fnlwgt dropped!')
plt.legend()

In [None]:
features_importance= list( zip( features, xgb_clf.feature_importances_) )
features_importance= sorted(features_importance, key= lambda x: x[1], reverse= True)

plt.figure(figsize= (50,5))
plt.title("Feature importances")
plt.bar(range(len(features)), [el[1] for el in features_importance],  color="r",  align="center")
plt.xticks(range(len(features)), [el[0] for el in features_importance], rotation= 90)
plt.xlim([-1, len(features)])

In [None]:
y_pred= xgb_clf.predict(X_train)
print('Confusion matrix for he train data')
display( confusion_matrix(y_pred, y_train) )

y_test_pred= xgb_clf.predict(X_test)
print('Confusion matrix for the test data')
display( confusion_matrix(y_test_pred, y_test) )

# feature selection from dummy-encoded features

In [None]:
display(features_importance[:5])
display(features_importance[-5:])

In [None]:
# we already have y_train and y_test
# we just drop the columns we don't want 
dummy_drop= [ el[0] for el in features_importance if el[1] == 0 ]
to_drop= ['fnlwgt', 'income'] + dummy_drop
X_train= df_dummy.iloc[ :len(train), :].drop(columns= to_drop).values
X_test= df_dummy.iloc[ len(train):, :].drop(columns= to_drop) .values
features= list(df_dummy.columns)
for col in to_drop:
    features.remove(col)
    

print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print('X_test shape: ', X_test.shape)
print('y_test shape: ', y_test.shape)
print('features: ', features )
###################################################################
start= time.time()

hyperparameters, cv_results= fit_xgboost(X_train, y_train, init_params= {
                'eta':0.2,
                'max_depth': 8,
                'min_child_weight': 3,
                'colsample_bytree': 0.8,
                'subsample': 0.8,
                'gamma': 0, 
                'reg_lambda': 1,
                'reg_alpha': 0,
                'scale_pos_weight': 3.15,
                'objective':'binary:logistic'}, 
                tuning_params= {
                'learning_rate': [0.2],
                'max_depth': [3, 4, 5, 6, 7, 8, 9, 10],
                'min_child_weight': [1, 2, 3, 4, 5],
                'colsample_bytree': [i/10.0 for i in range(4,10)],
                'subsample': [i/10.0 for i in range(6,10)],
                'gamma': [i/10.0 for i in range(0,5)], 
                'reg_lambda': [1e-5, 1e-2, 0.1, 1, 10, 100],
                'reg_alpha': [1e-5, 1e-2, 0.1, 1, 10, 100],
                'scale_pos_weight': [3.15],
                'objective': ['binary:logistic']}, metric= 'auc', cv= 5, seed= 42
           )

print('\n\nThe runtime took %.2f seconds.' %( time.time() - start ) )
#############################################################
xgb_clf= xgb.XGBClassifier( random_state= seed,  **hyperparameters )
xgb_clf.fit(X_train, y_train,
                   eval_set=[(X_train, y_train), (X_test, y_test)],
                   eval_metric= 'auc',
                   #early_stopping_rounds= 100, 
                   verbose= False)
evals_result = xgb_clf.evals_result()

evals_result= pd.DataFrame( {'train_auc': evals_result['validation_0']['auc'], 
              'test_auc': evals_result['validation_1']['auc']} )
display(evals_result.head())
display(evals_result.tail())
#############################################################
plt.figure(figsize= (10, 5))
plt.plot( evals_result['train_auc'], label= 'train_auc')
plt.plot( evals_result['test_auc'], label= 'real_test_auc')
plt.plot( cv_results['test-auc-mean'], label= 'cv_auc')
plt.xlabel('number of trees')
plt.ylabel('roc_auc')
plt.title('fnlwgt dropped!')
plt.legend()

In [None]:
y_pred= xgb_clf.predict(X_train)
print('Confusion matrix for he train data')
display( confusion_matrix(y_pred, y_train) )

y_test_pred= xgb_clf.predict(X_test)
print('Confusion matrix for the test data')
display( confusion_matrix(y_test_pred, y_test) )

# Neural Networks

In [None]:
X_train.shape

In [None]:
kernel= 'normal'

def create_baseline():
    model= Sequential()
    model.add( Dense(92, input_dim= 92, kernel_initializer= kernel, activation= 'relu'))
    model.add( Dense(92, kernel_initializer= kernel, activation= 'relu'))
    model.add( Dense(92, kernel_initializer= kernel, activation= 'relu'))
    model.add( Dense(46, kernel_initializer= kernel, activation= 'relu'))
    model.add( Dense(23, kernel_initializer= kernel, activation= 'relu'))
    model.add( Dense(1, kernel_initializer= kernel, activation= 'sigmoid' ) )
    
    model.compile(loss= 'binary_crossentropy', optimizer= 'adam', metrics= [ tf.keras.metrics.AUC() ])
    return model


In [None]:
pd.Series(y_test).value_counts(normalize= True)

In [None]:
start= time.time()
# train test error
model= create_baseline()
history= model.fit(X_train, y_train, epochs= 1200, batch_size= 512, validation_data= (X_test, y_test) ) 
                                               # class_weight= {0: 1, 1: 76.3/23.6} )

print("Training the neural network took %.1f seconds." %( time.time() - start ) )

**plot auc developement**

In [None]:
plt.figure(figsize= (20, 4))
plt.ylim(0, 1)
plt.plot(history.history['auc_14'], )
plt.plot(history.history['val_auc_14'], marker= 'o', markersize= 4)
plt.title('model auc')
plt.ylabel('auc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.show()

**plotting the loss**

In [None]:
plt.figure(figsize= (20, 4))
#plt.ylim(0, 1)
plt.plot(history.history['loss'], )
plt.plot(history.history['val_loss'], marker= 'o', markersize= 4)
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.show()

In [None]:
y_test_pred= model.predict_classes(X_test)  
#pd.Series(y_test_pred.ravel()).value_counts()
matrix = confusion_matrix(y_test, y_test_pred)
matrix


In [None]:
y_pred= model.predict_classes(X_train)  
matrix = confusion_matrix(y_train, y_pred)
matrix

In [None]:
y_test_pred= pd.Series(model.predict(X_test).ravel() )
y_test_pred= y_test_pred.apply(lambda x: 1 if x>= 0.2 else 0)
matrix = confusion_matrix(y_test, y_test_pred)
matrix
