# LOADING MODULES AND DATA SETS

In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import math

from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, StandardScaler,OrdinalEncoder
from sklearn.compose import ColumnTransformer,make_column_transformer
from sklearn.pipeline import Pipeline,make_pipeline
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV,RandomizedSearchCV
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score,roc_curve,precision_recall_curve,classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from scipy.sparse import csr_matrix
from eli5.sklearn import PermutationImportance

import eli5
import xgboost as xgbs
import seaborn as sns
import matplotlib.pyplot as plt
from yellowbrick import classifier, features, regressor
import pickle
import re

# BIVARIATE ANALYSIS AND FEATURE ENGINEERING

In [2]:
data_train = r'/Users/Humza Ali/Documents/Data/Machine Learning/Project 1/Consumer_Complaints_train.csv'
cd_train = pd.read_csv(data_train)

In [3]:
data_test = r'/Users/Humza Ali/Documents/Data/Machine Learning/Project 1/Consumer_Complaints_test_share.csv'
cd_test = pd.read_csv(data_test)

In [4]:
cd_train.head()

Unnamed: 0,Date received,Product,Sub-product,Issue,Sub-issue,Consumer complaint narrative,Company public response,Company,State,ZIP code,Tags,Consumer consent provided?,Submitted via,Date sent to company,Company response to consumer,Timely response?,Consumer disputed?,Complaint ID
0,2014-05-15,Credit card,,Billing statement,,,,Wells Fargo & Company,MI,48342,Older American,,Web,2014-05-16,Closed with explanation,Yes,No,856103
1,2014-09-18,Bank account or service,(CD) Certificate of deposit,"Making/receiving payments, sending money",,,,Santander Bank US,PA,18042,,,Referral,2014-09-24,Closed,Yes,No,1034666
2,2014-03-13,Credit reporting,,Incorrect information on credit report,Account status,,,Equifax,CA,92427,,,Referral,2014-04-03,Closed with non-monetary relief,Yes,No,756363
3,2015-07-17,Credit card,,Billing statement,,"My credit card statement from US Bank, XXXX. X...",Company chooses not to provide a public response,U.S. Bancorp,GA,305XX,Older American,Consent provided,Web,2015-07-17,Closed with monetary relief,Yes,No,1474177
4,2014-11-20,Credit card,,Transaction issue,,,,Bank of America,MA,02127,,,Web,2014-11-28,Closed with explanation,Yes,No,1132572


In [5]:
cd_test.head()

Unnamed: 0,Date received,Product,Sub-product,Issue,Sub-issue,Consumer complaint narrative,Company public response,Company,State,ZIP code,Tags,Consumer consent provided?,Submitted via,Date sent to company,Company response to consumer,Timely response?,Complaint ID
0,2014-01-18,Bank account or service,Cashing a check without an account,Deposits and withdrawals,,,,Bank of America,CA,95691,,,Web,2014-01-17,Closed with explanation,Yes,675956
1,2016-03-31,Debt collection,Credit card,Cont'd attempts collect debt not owed,Debt was paid,,,"National Credit Adjusters, LLC",FL,32086,,Consent not provided,Web,2016-03-31,Closed with explanation,Yes,1858795
2,2012-03-08,Mortgage,Conventional adjustable mortgage (ARM),"Loan servicing, payments, escrow account",,,,Wells Fargo & Company,CA,94618,,,Web,2012-03-09,Closed without relief,Yes,32637
3,2016-01-07,Credit reporting,,Unable to get credit report/credit score,Problem getting report or credit score,,Company chooses not to provide a public response,"TransUnion Intermediate Holdings, Inc.",FL,33584,Older American,,Postal mail,2016-01-12,Closed with non-monetary relief,Yes,1731374
4,2013-08-23,Mortgage,FHA mortgage,"Loan modification,collection,foreclosure",,,,Bank of America,FL,33543,,,Web,2013-08-23,Closed with explanation,Yes,501487


In [6]:
cd_train.shape,cd_test.shape

((478421, 18), (119606, 17))

In [7]:
cd_train.dtypes

Date received                   object
Product                         object
Sub-product                     object
Issue                           object
Sub-issue                       object
Consumer complaint narrative    object
Company public response         object
Company                         object
State                           object
ZIP code                        object
Tags                            object
Consumer consent provided?      object
Submitted via                   object
Date sent to company            object
Company response to consumer    object
Timely response?                object
Consumer disputed?              object
Complaint ID                     int64
dtype: object

In [8]:
cd_train.nunique()

Date received                     1759
Product                             12
Sub-product                         47
Issue                               95
Sub-issue                           68
Consumer complaint narrative     74019
Company public response             10
Company                           3276
State                               62
ZIP code                         25962
Tags                                 3
Consumer consent provided?           4
Submitted via                        6
Date sent to company              1706
Company response to consumer         7
Timely response?                     2
Consumer disputed?                   2
Complaint ID                    478421
dtype: int64

In [9]:
cd_train.isnull().sum()

Date received                        0
Product                              0
Sub-product                     138473
Issue                                0
Sub-issue                       292625
Consumer complaint narrative    403327
Company public response         388029
Company                              0
State                             3839
ZIP code                          3848
Tags                            411215
Consumer consent provided?      342934
Submitted via                        0
Date sent to company                 0
Company response to consumer         0
Timely response?                     0
Consumer disputed?                   0
Complaint ID                         0
dtype: int64

In [10]:
cd_train["Date received"].value_counts()

2015-08-27    767
2015-08-26    731
2014-06-26    697
2015-07-08    678
2015-08-12    654
             ... 
2016-09-18      6
2016-09-22      4
2016-09-21      4
2014-03-09      3
2016-09-23      1
Name: Date received, Length: 1759, dtype: int64

In [11]:
cd_train["Date received"]=pd.to_datetime(cd_train["Date received"],infer_datetime_format=True)

In [12]:
cd_test["Date received"]=pd.to_datetime(cd_test["Date received"],infer_datetime_format=True)

In [13]:
cd_train["Date sent to company"]=pd.to_datetime(cd_train["Date sent to company"],infer_datetime_format=True)

In [14]:
cd_test["Date sent to company"]=pd.to_datetime(cd_test["Date sent to company"],infer_datetime_format=True)

In [15]:
cd_train['day_diff']=(cd_train['Date sent to company']-cd_train['Date received']).dt.days
cd_test['day_diff']=(cd_test['Date sent to company']-cd_test['Date received']).dt.days

In [16]:
cd_train["day_diff"].value_counts()

0      209750
1       58939
2       38005
3       30711
4       28639
        ...  
571         1
612         1
356         1
573         1
511         1
Name: day_diff, Length: 398, dtype: int64

In [17]:
for col in ['Date received','Date sent to company']:
    cd_train.drop([col],1,inplace=True)
    cd_test.drop([col],1,inplace=True)

In [18]:
cd_train.head()

Unnamed: 0,Product,Sub-product,Issue,Sub-issue,Consumer complaint narrative,Company public response,Company,State,ZIP code,Tags,Consumer consent provided?,Submitted via,Company response to consumer,Timely response?,Consumer disputed?,Complaint ID,day_diff
0,Credit card,,Billing statement,,,,Wells Fargo & Company,MI,48342,Older American,,Web,Closed with explanation,Yes,No,856103,1
1,Bank account or service,(CD) Certificate of deposit,"Making/receiving payments, sending money",,,,Santander Bank US,PA,18042,,,Referral,Closed,Yes,No,1034666,6
2,Credit reporting,,Incorrect information on credit report,Account status,,,Equifax,CA,92427,,,Referral,Closed with non-monetary relief,Yes,No,756363,21
3,Credit card,,Billing statement,,"My credit card statement from US Bank, XXXX. X...",Company chooses not to provide a public response,U.S. Bancorp,GA,305XX,Older American,Consent provided,Web,Closed with monetary relief,Yes,No,1474177,0
4,Credit card,,Transaction issue,,,,Bank of America,MA,02127,,,Web,Closed with explanation,Yes,No,1132572,8


In [19]:
cd_train["Consumer disputed?"].value_counts()

No     376990
Yes    101431
Name: Consumer disputed?, dtype: int64

In [20]:
cd_train["Consumer disputed?"]=np.where(cd_train["Consumer disputed?"]=="Yes",1,0)

In [21]:
cd_train['Consumer disputed?'].value_counts()

0    376990
1    101431
Name: Consumer disputed?, dtype: int64

In [22]:
cd_train['Product'].value_counts()

Mortgage                   156175
Debt collection             86544
Credit reporting            81115
Credit card                 57358
Bank account or service     54403
Consumer Loan               18599
Student loan                14918
Money transfers              3349
Payday loan                  3219
Prepaid card                 2226
Other financial service       507
Virtual currency                8
Name: Product, dtype: int64

In [23]:
probs=round(cd_train.groupby("Product")["Consumer disputed?"].mean(),2).to_dict()

In [24]:
probs.items()

dict_items([('Bank account or service', 0.2), ('Consumer Loan', 0.24), ('Credit card', 0.22), ('Credit reporting', 0.18), ('Debt collection', 0.2), ('Money transfers', 0.15), ('Mortgage', 0.24), ('Other financial service', 0.23), ('Payday loan', 0.17), ('Prepaid card', 0.15), ('Student loan', 0.21), ('Virtual currency', 0.38)])

In [25]:
def mapping_func(df,x,y,prefix="pro_"):
    probs=round(cd_train.groupby(x)[y].mean(),2).to_dict()
    mapping_dict=dict()
    for k,v in probs.items():
        mapping_dict[k]=prefix+str(v).replace(".","")
    return mapping_dict

In [26]:
cd_train=cd_train.assign(
    Product=cd_train["Product"].map(mapping_func(cd_train,"Product","Consumer disputed?","prod_")),
    Sub_product=cd_train["Sub-product"].
    map(mapping_func(cd_train,"Sub-product","Consumer disputed?","subprod_")),
    Issue=cd_train["Issue"].map(mapping_func(cd_train,"Issue","Consumer disputed?","Issue_")),
    Company_public_response=cd_train["Company public response"].
    map(mapping_func(cd_train,"Company public response","Consumer disputed?","Cps_")),
    Sub_issue=cd_train["Sub-issue"].map(mapping_func(cd_train,"Sub-issue","Consumer disputed?","Sub_iss_")),
    State=cd_train["State"].map(mapping_func(cd_train,"State","Consumer disputed?","State_")))

In [27]:
cd_train["Product"].value_counts()

prod_024    174774
prod_02     140947
prod_018     81115
prod_022     57358
prod_021     14918
prod_015      5575
prod_017      3219
prod_023       507
prod_038         8
Name: Product, dtype: int64

In [28]:
cd_train.head()

Unnamed: 0,Product,Sub-product,Issue,Sub-issue,Consumer complaint narrative,Company public response,Company,State,ZIP code,Tags,Consumer consent provided?,Submitted via,Company response to consumer,Timely response?,Consumer disputed?,Complaint ID,day_diff,Sub_product,Company_public_response,Sub_issue
0,prod_022,,Issue_019,,,,Wells Fargo & Company,State_021,48342,Older American,,Web,Closed with explanation,Yes,0,856103,1,,,
1,prod_02,(CD) Certificate of deposit,Issue_021,,,,Santander Bank US,State_021,18042,,,Referral,Closed,Yes,0,1034666,6,subprod_027,,
2,prod_018,,Issue_016,Account status,,,Equifax,State_022,92427,,,Referral,Closed with non-monetary relief,Yes,0,756363,21,,,Sub_iss_014
3,prod_022,,Issue_019,,"My credit card statement from US Bank, XXXX. X...",Company chooses not to provide a public response,U.S. Bancorp,State_021,305XX,Older American,Consent provided,Web,Closed with monetary relief,Yes,0,1474177,0,,Cps_019,
4,prod_022,,Issue_021,,,,Bank of America,State_021,02127,,,Web,Closed with explanation,Yes,0,1132572,8,,,


In [29]:
cd_train.nunique(axis=0,dropna=True)

Product                              9
Sub-product                         47
Issue                               24
Sub-issue                           68
Consumer complaint narrative     74019
Company public response             10
Company                           3276
State                               15
ZIP code                         25962
Tags                                 3
Consumer consent provided?           4
Submitted via                        6
Company response to consumer         7
Timely response?                     2
Consumer disputed?                   2
Complaint ID                    478421
day_diff                           398
Sub_product                         23
Company_public_response              8
Sub_issue                           23
dtype: int64

In [30]:
cd_train["Tags"].value_counts()

Older American                   39064
Servicemember                    22592
Older American, Servicemember     5550
Name: Tags, dtype: int64

In [31]:
cd_train["Submitted via"].value_counts()

Web            313916
Referral        91352
Phone           34417
Postal mail     31448
Fax              7032
Email             256
Name: Submitted via, dtype: int64

In [32]:
cd_train["Consumer consent provided?"].value_counts()

Consent provided        75095
Consent not provided    56147
Other                    4242
Consent withdrawn           3
Name: Consumer consent provided?, dtype: int64

In [33]:
cd_train["Consumer consent provided?"]=cd_train["Consumer consent provided?"].str.replace(" ","_")

In [34]:
cd_train["Consumer consent provided?"].value_counts()

Consent_provided        75095
Consent_not_provided    56147
Other                    4242
Consent_withdrawn           3
Name: Consumer consent provided?, dtype: int64

In [35]:
cd_train["Company response to consumer"]=cd_train["Company response to consumer"].str.replace(" ","_")
cd_train["Company response to consumer"].value_counts() #ohe

Closed_with_explanation            354310
Closed_with_non-monetary_relief     61491
Closed_with_monetary_relief         32925
Closed_without_relief               14145
Closed                              11365
Closed_with_relief                   4184
Untimely_response                       1
Name: Company response to consumer, dtype: int64

In [36]:
cd_train["Timely response?"].value_counts()

Yes    470277
No       8144
Name: Timely response?, dtype: int64

In [37]:
cd_train.drop(["Consumer complaint narrative","Company","ZIP code","Complaint ID"],1,inplace=True)

In [38]:
cd_train.head()

Unnamed: 0,Product,Sub-product,Issue,Sub-issue,Company public response,State,Tags,Consumer consent provided?,Submitted via,Company response to consumer,Timely response?,Consumer disputed?,day_diff,Sub_product,Company_public_response,Sub_issue
0,prod_022,,Issue_019,,,State_021,Older American,,Web,Closed_with_explanation,Yes,0,1,,,
1,prod_02,(CD) Certificate of deposit,Issue_021,,,State_021,,,Referral,Closed,Yes,0,6,subprod_027,,
2,prod_018,,Issue_016,Account status,,State_022,,,Referral,Closed_with_non-monetary_relief,Yes,0,21,,,Sub_iss_014
3,prod_022,,Issue_019,,Company chooses not to provide a public response,State_021,Older American,Consent_provided,Web,Closed_with_monetary_relief,Yes,0,0,,Cps_019,
4,prod_022,,Issue_021,,,State_021,,,Web,Closed_with_explanation,Yes,0,8,,,


In [39]:
cd_train.isnull().sum()

Product                              0
Sub-product                     138473
Issue                                0
Sub-issue                       292625
Company public response         388029
State                             3839
Tags                            411215
Consumer consent provided?      342934
Submitted via                        0
Company response to consumer         0
Timely response?                     0
Consumer disputed?                   0
day_diff                             0
Sub_product                     138473
Company_public_response         388029
Sub_issue                       292625
dtype: int64

In [40]:
cd_train.isnull().sum()*100/len(cd_train)

Product                          0.000000
Sub-product                     28.943755
Issue                            0.000000
Sub-issue                       61.164748
Company public response         81.106181
State                            0.802431
Tags                            85.952540
Consumer consent provided?      71.680382
Submitted via                    0.000000
Company response to consumer     0.000000
Timely response?                 0.000000
Consumer disputed?               0.000000
day_diff                         0.000000
Sub_product                     28.943755
Company_public_response         81.106181
Sub_issue                       61.164748
dtype: float64

In [41]:
cd_test.isnull().sum()*100/len(cd_test)

Product                          0.000000
Sub-product                     29.055399
Issue                            0.000000
Sub-issue                       61.083892
Consumer complaint narrative    84.484892
Company public response         80.957477
Company                          0.000000
State                            0.773373
ZIP code                         0.774209
Tags                            85.894520
Consumer consent provided?      71.687039
Submitted via                    0.000836
Company response to consumer     0.000000
Timely response?                 0.000000
Complaint ID                     0.000000
day_diff                         0.000000
dtype: float64

# TRAIN TEST SPLIT

In [42]:
x_train,x_test= train_test_split(cd_train,test_size=.2,random_state=1)

In [43]:
x_train1=x_train.drop(["Consumer disputed?"],1)
y_train1=x_train["Consumer disputed?"]

In [44]:
x_test1=x_test.drop(["Consumer disputed?"],1)
y_test1=x_test["Consumer disputed?"]

In [45]:
num_cols=x_train1.select_dtypes(np.number).columns

In [46]:
char_cols=x_train1.select_dtypes(object).columns

In [47]:
num_cols,char_cols

(Index(['day_diff'], dtype='object'),
 Index(['Product', 'Sub-product', 'Issue', 'Sub-issue',
        'Company public response', 'State', 'Tags',
        'Consumer consent provided?', 'Submitted via',
        'Company response to consumer', 'Timely response?', 'Sub_product',
        'Company_public_response', 'Sub_issue'],
       dtype='object'))

In [48]:
pipe_num= make_pipeline(SimpleImputer(strategy="median"),StandardScaler())
pipe_char=make_pipeline(SimpleImputer(strategy="constant",fill_value="Missing"),
                       OneHotEncoder(handle_unknown="ignore"))

In [49]:
ctrans=make_column_transformer((pipe_num,num_cols),(pipe_char,char_cols))

In [50]:
ctrans.fit_transform(x_train1)

<382736x259 sparse matrix of type '<class 'numpy.float64'>'
	with 5741040 stored elements in Compressed Sparse Row format>

In [51]:
ctrans.transform(cd_test)

<119606x259 sparse matrix of type '<class 'numpy.float64'>'
	with 1286702 stored elements in Compressed Sparse Row format>

# LOGISTIC REGRESSION

In [52]:
logreg=LogisticRegression(
   solver="liblinear",
    penalty="l1",
    class_weight="balanced",
    random_state=1,
    max_iter=800,)

In [53]:
logreg

LogisticRegression(C=1.0, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=800, multi_class='warn', n_jobs=None, penalty='l1',
                   random_state=1, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [54]:
pipe=make_pipeline(ctrans,logreg)

In [55]:
pipe

Pipeline(memory=None,
         steps=[('columntransformer',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('pipeline-1',
                                                  Pipeline(memory=None,
                                                           steps=[('simpleimputer',
                                                                   SimpleImputer(add_indicator=False,
                                                                                 copy=True,
                                                                                 fill_value=None,
                                                                                 missing_values=nan,
                                                                                 strategy='median',
                                           

In [56]:
pipe.fit(x_train1,y_train1)

Pipeline(memory=None,
         steps=[('columntransformer',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('pipeline-1',
                                                  Pipeline(memory=None,
                                                           steps=[('simpleimputer',
                                                                   SimpleImputer(add_indicator=False,
                                                                                 copy=True,
                                                                                 fill_value=None,
                                                                                 missing_values=nan,
                                                                                 strategy='median',
                                           

In [57]:
pipe.predict(x_train1)

array([1, 1, 1, ..., 0, 0, 1])

In [58]:
pipe.predict(x_test1)

array([0, 0, 1, ..., 1, 0, 0])

# AUC ROC Score

In [59]:
roc_auc_score(y_train1,pipe.predict_proba(x_train1)[:,1])

0.6219741103877195

In [60]:
roc_auc_score(y_test1,pipe.predict_proba(x_test1)[:,1])

0.621030162738635

In [61]:
# Now AUC ROC on Whole Data

In [62]:
roc_auc_score(y_train1,pipe.predict_proba(x_train)[:,1])

0.6219741103877195

In [63]:
roc_auc_score(y_test1,pipe.predict_proba(x_test)[:,1])

0.621030162738635

In [64]:
pipe.predict(cd_test)

array([1, 1, 1, ..., 0, 0, 1])

In [None]:
submission = pipe.predict(cd_test)

In [None]:
submission=pd.DataFrame(data=submission)

In [None]:
type(submission)

In [None]:
submission

In [None]:
submission.columns=["Consumer disputed?"]

In [None]:
submission["Consumer disputed?"]=np.where(submission["Consumer disputed?"]==1,"Yes","No")

In [None]:
submission["Complaint ID"]=cd_test["Complaint ID"]

In [None]:
submission=submission[["Complaint ID","Consumer disputed?"]]

In [None]:
submission

In [None]:
submission.to_csv("Abdeali_Project1.csv",index=False)

# RANDOM FOREST

In [65]:
strings='randomforestclassifier__'

param_dict={'n_estimators':[int(x) for x in np.linspace(200,2000,num=10)],
           'max_features':['auto','sqrt'],
            'max_depth':[int(x) for x in np.linspace(10,110,num=11)],
            'min_samples_split':[2,5,10],
            'min_samples_leaf':[1,2,4],
            'bootstrap':[True,False]
            }

In [66]:
param_dict.items()

dict_items([('n_estimators', [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]), ('max_features', ['auto', 'sqrt']), ('max_depth', [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110]), ('min_samples_split', [2, 5, 10]), ('min_samples_leaf', [1, 2, 4]), ('bootstrap', [True, False])])

In [67]:
param_dict = {strings+k:v for k,v in param_dict.items()}

In [68]:
param_dict

{'randomforestclassifier__n_estimators': [200,
  400,
  600,
  800,
  1000,
  1200,
  1400,
  1600,
  1800,
  2000],
 'randomforestclassifier__max_features': ['auto', 'sqrt'],
 'randomforestclassifier__max_depth': [10,
  20,
  30,
  40,
  50,
  60,
  70,
  80,
  90,
  100,
  110],
 'randomforestclassifier__min_samples_split': [2, 5, 10],
 'randomforestclassifier__min_samples_leaf': [1, 2, 4],
 'randomforestclassifier__bootstrap': [True, False]}

In [69]:
ctrans.fit_transform(x_train1)

<382736x259 sparse matrix of type '<class 'numpy.float64'>'
	with 5741040 stored elements in Compressed Sparse Row format>

In [70]:
rf = RandomForestClassifier(random_state=1)

In [71]:
rf

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators='warn',
                       n_jobs=None, oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [72]:
model_pipeline = make_pipeline(ctrans,rf)

In [73]:
model_pipeline

Pipeline(memory=None,
         steps=[('columntransformer',
                 ColumnTransformer(n_jobs=None, remainder='drop',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('pipeline-1',
                                                  Pipeline(memory=None,
                                                           steps=[('simpleimputer',
                                                                   SimpleImputer(add_indicator=False,
                                                                                 copy=True,
                                                                                 fill_value=None,
                                                                                 missing_values=nan,
                                                                                 strategy='median',
                                           

In [74]:
grid=RandomizedSearchCV(model_pipeline,param_dict,cv=5,scoring = "accuracy")

In [75]:
grid

RandomizedSearchCV(cv=5, error_score='raise-deprecating',
                   estimator=Pipeline(memory=None,
                                      steps=[('columntransformer',
                                              ColumnTransformer(n_jobs=None,
                                                                remainder='drop',
                                                                sparse_threshold=0.3,
                                                                transformer_weights=None,
                                                                transformers=[('pipeline-1',
                                                                               Pipeline(memory=None,
                                                                                        steps=[('simpleimputer',
                                                                                                SimpleImputer(add_indicator=False,
                                                   

In [None]:
grid.fit(x_train1,y_train1)

In [None]:
grid.predict(x_train1)

In [None]:
grid.predict(x_test1)

In [None]:
roc_auc_score(y_train1,grid.predict_proba(x_train1)[:,1])

In [None]:
roc_auc_score(y_test1,grid.predict_proba(x_test1)[:,1])