In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [2]:
df = pd.read_csv('train_Df64byy.csv')

In [3]:
df.head()

Unnamed: 0,ID,City_Code,Region_Code,Accomodation_Type,Reco_Insurance_Type,Upper_Age,Lower_Age,Is_Spouse,Health_Indicator,Holding_Policy_Duration,Holding_Policy_Type,Reco_Policy_Cat,Reco_Policy_Premium,Response
0,1,C3,3213,Rented,Individual,36,36,No,X1,14+,3.0,22,11628.0,0
1,2,C5,1117,Owned,Joint,75,22,No,X2,,,22,30510.0,0
2,3,C5,3732,Owned,Individual,32,32,No,,1,1.0,19,7450.0,1
3,4,C24,4378,Owned,Joint,52,48,No,X1,14+,3.0,19,17780.0,0
4,5,C8,2190,Rented,Individual,44,44,No,X2,3,1.0,16,10404.0,0


In [4]:
df.columns

Index(['ID', 'City_Code', 'Region_Code', 'Accomodation_Type',
       'Reco_Insurance_Type', 'Upper_Age', 'Lower_Age', 'Is_Spouse',
       'Health_Indicator', 'Holding_Policy_Duration', 'Holding_Policy_Type',
       'Reco_Policy_Cat', 'Reco_Policy_Premium', 'Response'],
      dtype='object')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50882 entries, 0 to 50881
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   ID                       50882 non-null  int64  
 1   City_Code                50882 non-null  object 
 2   Region_Code              50882 non-null  int64  
 3   Accomodation_Type        50882 non-null  object 
 4   Reco_Insurance_Type      50882 non-null  object 
 5   Upper_Age                50882 non-null  int64  
 6   Lower_Age                50882 non-null  int64  
 7   Is_Spouse                50882 non-null  object 
 8   Health_Indicator         39191 non-null  object 
 9   Holding_Policy_Duration  30631 non-null  object 
 10  Holding_Policy_Type      30631 non-null  float64
 11  Reco_Policy_Cat          50882 non-null  int64  
 12  Reco_Policy_Premium      50882 non-null  float64
 13  Response                 50882 non-null  int64  
dtypes: float64(2), int64(6

# Data Cleaning

In [6]:
df.isnull().sum()

ID                             0
City_Code                      0
Region_Code                    0
Accomodation_Type              0
Reco_Insurance_Type            0
Upper_Age                      0
Lower_Age                      0
Is_Spouse                      0
Health_Indicator           11691
Holding_Policy_Duration    20251
Holding_Policy_Type        20251
Reco_Policy_Cat                0
Reco_Policy_Premium            0
Response                       0
dtype: int64

In [7]:
df['Health_Indicator'].unique()

array(['X1', 'X2', nan, 'X4', 'X3', 'X6', 'X5', 'X8', 'X7', 'X9'],
      dtype=object)

In [8]:
#Replacing the null value with mode
df['Health_Indicator']=df['Health_Indicator'].replace(np.nan,0)

In [9]:
df.isnull().sum()

ID                             0
City_Code                      0
Region_Code                    0
Accomodation_Type              0
Reco_Insurance_Type            0
Upper_Age                      0
Lower_Age                      0
Is_Spouse                      0
Health_Indicator               0
Holding_Policy_Duration    20251
Holding_Policy_Type        20251
Reco_Policy_Cat                0
Reco_Policy_Premium            0
Response                       0
dtype: int64

In [10]:
df['Holding_Policy_Duration'].unique()

array(['14+', nan, '1', '3', '5', '9', '14', '7', '2', '11', '10', '8',
       '6', '4', '13', '12'], dtype=object)

In [11]:
#Replacing the null values with mode
df['Holding_Policy_Duration']=df['Holding_Policy_Duration'].replace(np.nan,0)

In [12]:
df['Holding_Policy_Type'].unique()

array([ 3., nan,  1.,  4.,  2.])

In [13]:
#Replacing the null values with mean
df['Holding_Policy_Type']=df['Holding_Policy_Type'].replace(np.nan,0)

In [14]:
df.isnull().sum()

ID                         0
City_Code                  0
Region_Code                0
Accomodation_Type          0
Reco_Insurance_Type        0
Upper_Age                  0
Lower_Age                  0
Is_Spouse                  0
Health_Indicator           0
Holding_Policy_Duration    0
Holding_Policy_Type        0
Reco_Policy_Cat            0
Reco_Policy_Premium        0
Response                   0
dtype: int64

In [15]:
df.columns

Index(['ID', 'City_Code', 'Region_Code', 'Accomodation_Type',
       'Reco_Insurance_Type', 'Upper_Age', 'Lower_Age', 'Is_Spouse',
       'Health_Indicator', 'Holding_Policy_Duration', 'Holding_Policy_Type',
       'Reco_Policy_Cat', 'Reco_Policy_Premium', 'Response'],
      dtype='object')

In [16]:
df.drop(['ID'],axis=1,inplace = True)

In [17]:
df.dtypes

City_Code                   object
Region_Code                  int64
Accomodation_Type           object
Reco_Insurance_Type         object
Upper_Age                    int64
Lower_Age                    int64
Is_Spouse                   object
Health_Indicator            object
Holding_Policy_Duration     object
Holding_Policy_Type        float64
Reco_Policy_Cat              int64
Reco_Policy_Premium        float64
Response                     int64
dtype: object

In [18]:
#fetching those columns which contains strings and assigning to temp
temp = df.loc[:,df.dtypes=='object']

In [19]:
temp.dtypes

City_Code                  object
Accomodation_Type          object
Reco_Insurance_Type        object
Is_Spouse                  object
Health_Indicator           object
Holding_Policy_Duration    object
dtype: object

In [20]:
temp.columns

Index(['City_Code', 'Accomodation_Type', 'Reco_Insurance_Type', 'Is_Spouse',
       'Health_Indicator', 'Holding_Policy_Duration'],
      dtype='object')

In [21]:
dummy=pd.get_dummies(df[['City_Code', 'Accomodation_Type', 'Reco_Insurance_Type', 'Is_Spouse',
       'Health_Indicator', 'Holding_Policy_Duration']],drop_first = True)

In [22]:
dummy

Unnamed: 0,City_Code_C10,City_Code_C11,City_Code_C12,City_Code_C13,City_Code_C14,City_Code_C15,City_Code_C16,City_Code_C17,City_Code_C18,City_Code_C19,...,Holding_Policy_Duration_14,Holding_Policy_Duration_14+,Holding_Policy_Duration_2,Holding_Policy_Duration_3,Holding_Policy_Duration_4,Holding_Policy_Duration_5,Holding_Policy_Duration_6,Holding_Policy_Duration_7,Holding_Policy_Duration_8,Holding_Policy_Duration_9
0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50877,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
50878,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
50879,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
50880,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [23]:
df = pd.concat([df,dummy],axis =1)

In [24]:
df.head()

Unnamed: 0,City_Code,Region_Code,Accomodation_Type,Reco_Insurance_Type,Upper_Age,Lower_Age,Is_Spouse,Health_Indicator,Holding_Policy_Duration,Holding_Policy_Type,...,Holding_Policy_Duration_14,Holding_Policy_Duration_14+,Holding_Policy_Duration_2,Holding_Policy_Duration_3,Holding_Policy_Duration_4,Holding_Policy_Duration_5,Holding_Policy_Duration_6,Holding_Policy_Duration_7,Holding_Policy_Duration_8,Holding_Policy_Duration_9
0,C3,3213,Rented,Individual,36,36,No,X1,14+,3.0,...,0,1,0,0,0,0,0,0,0,0
1,C5,1117,Owned,Joint,75,22,No,X2,0,0.0,...,0,0,0,0,0,0,0,0,0,0
2,C5,3732,Owned,Individual,32,32,No,0,1,1.0,...,0,0,0,0,0,0,0,0,0,0
3,C24,4378,Owned,Joint,52,48,No,X1,14+,3.0,...,0,1,0,0,0,0,0,0,0,0
4,C8,2190,Rented,Individual,44,44,No,X2,3,1.0,...,0,0,0,1,0,0,0,0,0,0


In [25]:
df

Unnamed: 0,City_Code,Region_Code,Accomodation_Type,Reco_Insurance_Type,Upper_Age,Lower_Age,Is_Spouse,Health_Indicator,Holding_Policy_Duration,Holding_Policy_Type,...,Holding_Policy_Duration_14,Holding_Policy_Duration_14+,Holding_Policy_Duration_2,Holding_Policy_Duration_3,Holding_Policy_Duration_4,Holding_Policy_Duration_5,Holding_Policy_Duration_6,Holding_Policy_Duration_7,Holding_Policy_Duration_8,Holding_Policy_Duration_9
0,C3,3213,Rented,Individual,36,36,No,X1,14+,3.0,...,0,1,0,0,0,0,0,0,0,0
1,C5,1117,Owned,Joint,75,22,No,X2,0,0.0,...,0,0,0,0,0,0,0,0,0,0
2,C5,3732,Owned,Individual,32,32,No,0,1,1.0,...,0,0,0,0,0,0,0,0,0,0
3,C24,4378,Owned,Joint,52,48,No,X1,14+,3.0,...,0,1,0,0,0,0,0,0,0,0
4,C8,2190,Rented,Individual,44,44,No,X2,3,1.0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50877,C4,845,Rented,Individual,22,22,No,X3,0,0.0,...,0,0,0,0,0,0,0,0,0,0
50878,C5,4188,Rented,Individual,27,27,No,X3,7,3.0,...,0,0,0,0,0,0,0,1,0,0
50879,C1,442,Rented,Individual,63,63,No,X2,14+,1.0,...,0,1,0,0,0,0,0,0,0,0
50880,C1,4,Owned,Joint,71,49,No,X2,2,2.0,...,0,0,1,0,0,0,0,0,0,0


In [26]:
df = df.drop(['City_Code', 'Accomodation_Type', 'Reco_Insurance_Type', 'Is_Spouse',
       'Health_Indicator', 'Holding_Policy_Duration'], axis = 1)

In [27]:
df.columns

Index(['Region_Code', 'Upper_Age', 'Lower_Age', 'Holding_Policy_Type',
       'Reco_Policy_Cat', 'Reco_Policy_Premium', 'Response', 'City_Code_C10',
       'City_Code_C11', 'City_Code_C12', 'City_Code_C13', 'City_Code_C14',
       'City_Code_C15', 'City_Code_C16', 'City_Code_C17', 'City_Code_C18',
       'City_Code_C19', 'City_Code_C2', 'City_Code_C20', 'City_Code_C21',
       'City_Code_C22', 'City_Code_C23', 'City_Code_C24', 'City_Code_C25',
       'City_Code_C26', 'City_Code_C27', 'City_Code_C28', 'City_Code_C29',
       'City_Code_C3', 'City_Code_C30', 'City_Code_C31', 'City_Code_C32',
       'City_Code_C33', 'City_Code_C34', 'City_Code_C35', 'City_Code_C36',
       'City_Code_C4', 'City_Code_C5', 'City_Code_C6', 'City_Code_C7',
       'City_Code_C8', 'City_Code_C9', 'Accomodation_Type_Rented',
       'Reco_Insurance_Type_Joint', 'Is_Spouse_Yes', 'Health_Indicator_X1',
       'Health_Indicator_X2', 'Health_Indicator_X3', 'Health_Indicator_X4',
       'Health_Indicator_X5', 'Health_

In [28]:
#Checking any object data type is present or not
(df.dtypes=='object').sum()

0

In [29]:
df

Unnamed: 0,Region_Code,Upper_Age,Lower_Age,Holding_Policy_Type,Reco_Policy_Cat,Reco_Policy_Premium,Response,City_Code_C10,City_Code_C11,City_Code_C12,...,Holding_Policy_Duration_14,Holding_Policy_Duration_14+,Holding_Policy_Duration_2,Holding_Policy_Duration_3,Holding_Policy_Duration_4,Holding_Policy_Duration_5,Holding_Policy_Duration_6,Holding_Policy_Duration_7,Holding_Policy_Duration_8,Holding_Policy_Duration_9
0,3213,36,36,3.0,22,11628.0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,1117,75,22,0.0,22,30510.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3732,32,32,1.0,19,7450.0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4378,52,48,3.0,19,17780.0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,2190,44,44,1.0,16,10404.0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50877,845,22,22,0.0,18,7704.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
50878,4188,27,27,3.0,4,5408.0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
50879,442,63,63,1.0,12,11374.0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
50880,4,71,49,2.0,16,28179.2,1,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [30]:
#Extracting features
x=df.drop(['Response'],axis=1)
y=df['Response']

In [31]:
#feature scaling
from sklearn.preprocessing import MinMaxScaler

In [32]:
scaler = MinMaxScaler()

In [33]:
x[['Region_Code','Reco_Policy_Premium']]= scaler.fit_transform(x[['Region_Code','Reco_Policy_Premium']])

In [34]:
x

Unnamed: 0,Region_Code,Upper_Age,Lower_Age,Holding_Policy_Type,Reco_Policy_Cat,Reco_Policy_Premium,City_Code_C10,City_Code_C11,City_Code_C12,City_Code_C13,...,Holding_Policy_Duration_14,Holding_Policy_Duration_14+,Holding_Policy_Duration_2,Holding_Policy_Duration_3,Holding_Policy_Duration_4,Holding_Policy_Duration_5,Holding_Policy_Duration_6,Holding_Policy_Duration_7,Holding_Policy_Duration_8,Holding_Policy_Duration_9
0,0.518650,36,36,3.0,22,0.227609,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,0.180203,75,22,0.0,22,0.687356,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0.602454,32,32,1.0,19,0.125881,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0.706766,52,48,3.0,19,0.377401,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,0.353464,44,44,1.0,16,0.197807,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50877,0.136283,22,22,0.0,18,0.132066,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
50878,0.676086,27,27,3.0,4,0.076162,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
50879,0.071209,63,63,1.0,12,0.221425,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
50880,0.000484,71,49,2.0,16,0.630605,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [35]:
from sklearn.model_selection import train_test_split

In [36]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.2,random_state = 100,stratify=y)

In [37]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

clf = LogisticRegression(solver="liblinear", random_state=0).fit(xtrain, ytrain)
roc_auc_score(ytest, clf.predict_proba(xtest)[:, 1])


0.5749098278510043

In [38]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import StackingClassifier

In [39]:
algo = LogisticRegression()

In [40]:
base_learner = [('clf1',KNeighborsClassifier(n_neighbors=5)),('clf2',DecisionTreeClassifier(criterion ='entropy'))]

In [41]:
stacking = StackingClassifier(estimators = base_learner, final_estimator= algo)

In [42]:
stacking.fit(xtrain, ytrain)

StackingClassifier(estimators=[('clf1', KNeighborsClassifier()),
                               ('clf2',
                                DecisionTreeClassifier(criterion='entropy'))],
                   final_estimator=LogisticRegression())

In [43]:
from sklearn.metrics import roc_auc_score
roc_auc_score(ytest, stacking.predict_proba(xtest)[:,1])

0.5871133106427224

In [44]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

rfc = RandomForestClassifier(criterion = 'entropy')
rfc.fit(xtrain,ytrain)
roc_auc_score(ytest, rfc.predict_proba(xtest)[:,1])


0.6252060340295635

In [45]:
#setting final algorithm as random forest classifier because it gives a better score as compared to other algorithms.

# Importing test set

In [46]:

test = pd.read_csv('test_YCcRUnU.csv')

In [47]:
test.head()

Unnamed: 0,ID,City_Code,Region_Code,Accomodation_Type,Reco_Insurance_Type,Upper_Age,Lower_Age,Is_Spouse,Health_Indicator,Holding_Policy_Duration,Holding_Policy_Type,Reco_Policy_Cat,Reco_Policy_Premium
0,50883,C1,156,Owned,Individual,30,30,No,,6.0,3.0,5,11934.0
1,50884,C4,7,Owned,Joint,69,68,Yes,X1,3.0,3.0,18,32204.8
2,50885,C1,564,Rented,Individual,28,28,No,X3,2.0,4.0,17,9240.0
3,50886,C3,1177,Rented,Individual,23,23,No,X3,3.0,3.0,18,9086.0
4,50887,C1,951,Owned,Individual,75,75,No,X3,,,5,22534.0


In [48]:
ID=test['ID']
test.drop(['ID'],axis=1,inplace = True)

In [49]:
test['Health_Indicator']=test['Health_Indicator'].replace(np.nan,0)
test['Holding_Policy_Duration']=test['Holding_Policy_Duration'].replace(np.nan,0)
test['Holding_Policy_Type']=test['Holding_Policy_Type'].replace(np.nan,0)

In [50]:
temp1 = test.loc[:,test.dtypes=='object']

In [51]:
temp1.dtypes

City_Code                  object
Accomodation_Type          object
Reco_Insurance_Type        object
Is_Spouse                  object
Health_Indicator           object
Holding_Policy_Duration    object
dtype: object

In [52]:
temp1.columns

Index(['City_Code', 'Accomodation_Type', 'Reco_Insurance_Type', 'Is_Spouse',
       'Health_Indicator', 'Holding_Policy_Duration'],
      dtype='object')

In [53]:
dummy1=pd.get_dummies(temp1[['City_Code', 'Accomodation_Type', 'Reco_Insurance_Type', 'Is_Spouse',
       'Health_Indicator', 'Holding_Policy_Duration']],drop_first = True)

In [54]:
dummy1

Unnamed: 0,City_Code_C10,City_Code_C11,City_Code_C12,City_Code_C13,City_Code_C14,City_Code_C15,City_Code_C16,City_Code_C17,City_Code_C18,City_Code_C19,...,Holding_Policy_Duration_14,Holding_Policy_Duration_14+,Holding_Policy_Duration_2,Holding_Policy_Duration_3,Holding_Policy_Duration_4,Holding_Policy_Duration_5,Holding_Policy_Duration_6,Holding_Policy_Duration_7,Holding_Policy_Duration_8,Holding_Policy_Duration_9
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21800,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
21801,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
21802,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21803,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0


In [55]:
test = pd.concat([test,dummy1],axis =1)

In [56]:
test = test.drop(['City_Code', 'Accomodation_Type', 'Reco_Insurance_Type', 'Is_Spouse',
       'Health_Indicator', 'Holding_Policy_Duration'], axis = 1)

In [57]:
test

Unnamed: 0,Region_Code,Upper_Age,Lower_Age,Holding_Policy_Type,Reco_Policy_Cat,Reco_Policy_Premium,City_Code_C10,City_Code_C11,City_Code_C12,City_Code_C13,...,Holding_Policy_Duration_14,Holding_Policy_Duration_14+,Holding_Policy_Duration_2,Holding_Policy_Duration_3,Holding_Policy_Duration_4,Holding_Policy_Duration_5,Holding_Policy_Duration_6,Holding_Policy_Duration_7,Holding_Policy_Duration_8,Holding_Policy_Duration_9
0,156,30,30,3.0,5,11934.0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
1,7,69,68,3.0,18,32204.8,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,564,28,28,4.0,17,9240.0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,1177,23,23,3.0,18,9086.0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,951,75,75,0.0,5,22534.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21800,1044,45,45,1.0,18,15884.0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
21801,266,59,59,3.0,18,21390.0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
21802,2470,74,74,0.0,1,17836.0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
21803,1676,25,25,1.0,19,11568.0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0


In [58]:
#feature scaling
from sklearn.preprocessing import MinMaxScaler

In [59]:
scaler = MinMaxScaler()

In [60]:
test[['Region_Code','Reco_Policy_Premium']]= scaler.fit_transform(test[['Region_Code','Reco_Policy_Premium']])

In [61]:
test

Unnamed: 0,Region_Code,Upper_Age,Lower_Age,Holding_Policy_Type,Reco_Policy_Cat,Reco_Policy_Premium,City_Code_C10,City_Code_C11,City_Code_C12,City_Code_C13,...,Holding_Policy_Duration_14,Holding_Policy_Duration_14+,Holding_Policy_Duration_2,Holding_Policy_Duration_3,Holding_Policy_Duration_4,Holding_Policy_Duration_5,Holding_Policy_Duration_6,Holding_Policy_Duration_7,Holding_Policy_Duration_8,Holding_Policy_Duration_9
0,0.025065,30,30,3.0,5,0.235009,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
1,0.000970,69,68,3.0,18,0.722007,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,0.091041,28,28,4.0,17,0.170286,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,0.190168,23,23,3.0,18,0.166587,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,0.153622,75,75,0.0,5,0.489669,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21800,0.168661,45,45,1.0,18,0.329906,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
21801,0.042853,59,59,3.0,18,0.462185,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
21802,0.399256,74,74,0.0,1,0.376802,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
21803,0.270860,25,25,1.0,19,0.226216,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0


In [62]:
ypred = rfc.predict(test)

In [63]:
type(ypred)

numpy.ndarray

In [64]:
type(ID)

pandas.core.series.Series

In [65]:
ypred= pd.DataFrame(ypred, columns = ['Response'])

In [66]:
ID = pd.DataFrame(ID, columns = ['ID'])

In [67]:
Final = pd.concat([ID,ypred],axis =1)

In [68]:
Final['Response'].unique()

array([0, 1], dtype=int64)

In [69]:
Final.to_csv('file33.csv',index=False)