In [724]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split


# Scaling
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PowerTransformer, MaxAbsScaler
from sklearn.pipeline import Pipeline

import datetime as dt
from time import time

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
from sklearn.naive_bayes import GaussianNB
from lightgbm import LGBMClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
from sklearn.linear_model import SGDClassifier

from sklearn.metrics import make_scorer, accuracy_score, f1_score
from sklearn.model_selection import cross_val_score

from sklearn.preprocessing import LabelEncoder
import category_encoders as ce

from IPython.display import display


pd.set_option('display.max_columns', None)

In [725]:
df_train = pd.read_csv('Train.csv')
df_test = pd.read_csv('Test.csv')

### **Combine Train & Test dataset for preprocessing**

In [726]:
display([df_train.shape,df_test.shape])
training_data = df_train.shape[0]

[(12079, 14), (1202, 13)]

In [727]:
df_combined = pd.concat([df_train, df_test]).reset_index(drop=True)
display(df_combined.head())

Unnamed: 0,ID,Policy Start Date,Policy End Date,Gender,Age,First Transaction Date,No_Pol,Car_Category,Subject_Car_Colour,Subject_Car_Make,LGA_Name,State,ProductName,target
0,ID_0040R73,2010-05-14,2011-05-13,Male,30,2010-05-14,1,Saloon,Black,TOYOTA,,,Car Classic,0.0
1,ID_0046BNK,2010-11-29,2011-11-28,Female,79,2010-11-29,1,JEEP,Grey,TOYOTA,,,Car Classic,1.0
2,ID_005QMC3,2010-03-21,2011-03-20,Male,43,2010-03-21,1,Saloon,Red,TOYOTA,,,Car Classic,0.0
3,ID_0079OHW,2010-08-21,2011-08-20,Male,2,2010-08-21,1,,,,,,CarSafe,0.0
4,ID_00BRP63,2010-08-29,2010-12-31,Entity,20,2010-08-29,3,,,,Lagos,Lagos,Muuve,1.0


### **Data Preprocessing**

**Convert to appropriate data types**

In [728]:
date_cols = [col for col in df_combined.columns if "Date" in col]
display(date_cols)

for col in date_cols:
    df_combined[col] = pd.to_datetime(df_combined[col])

['Policy Start Date', 'Policy End Date', 'First Transaction Date']

In [729]:
cat_columns = ['Gender', 'Car_Category', 'Subject_Car_Colour', 'LGA_Name', 'State', 'ProductName']

for col in cat_columns:
    df_combined[col] = df_combined[col].astype('category')

**Clean up Gender**

In [730]:
df_combined['company_type'] = df_combined[df_combined['Gender'] == 'Entity']['Gender']
df_combined['Bi_Sexual'] = df_combined[df_combined['Gender'] ==  'Joint Gender']['Gender']
df_combined['Unknown'] = df_combined[df_combined['Gender'] ==  'NOT STATED']['Gender']
df_combined['No_Gender'] = df_combined[df_combined['Gender'] ==  'NO GENDER']['Gender']
df_combined['Sex'] = df_combined[df_combined['Gender'] ==  'SEX']['Gender']
df_combined['Male'] = df_combined[df_combined['Gender'] == 'Male']['Gender']
df_combined['Female'] = df_combined[df_combined['Gender'] == 'Female']['Gender']

In [731]:
display(df_combined['Gender'].value_counts())

mapper = {
    'Entity': np.nan,
    'Joint Gender': 'Other',
    'NOT STATED': 'Other',
    'NO GENDER': np.nan,
    'SEX': 'Other',
    np.nan: 'Other'
}

df_combined['Gender'] = df_combined['Gender'].replace(mapper)
display(df_combined['Gender'].value_counts())

Male            8356
Female          3679
Entity           300
Joint Gender     238
NOT STATED       190
NO GENDER         76
SEX               42
Name: Gender, dtype: int64

Male      8356
Female    3679
Other      870
Name: Gender, dtype: int64

In [732]:
string_cols = df_combined.select_dtypes(['category', 'object']).columns.difference(['ID'])
display(string_cols)

from collections import OrderedDict
import re

for col in string_cols:
    df_combined[col] = df_combined[col].apply(lambda x: re.sub("['!@#$%*><^\]\[()=_+{}:\";?,.\/-]+", ' ', x) if isinstance(x, str) else x)

# remove name repetitions withn category
for col in string_cols:
    df_combined[col] = df_combined[col].str.split().apply(lambda x: OrderedDict.fromkeys(x).keys() if isinstance(x, str) else x).str.join(" ").str.title().str.strip()

Index(['Bi_Sexual', 'Car_Category', 'Female', 'Gender', 'LGA_Name', 'Male',
       'No_Gender', 'ProductName', 'Sex', 'State', 'Subject_Car_Colour',
       'Subject_Car_Make', 'Unknown', 'company_type'],
      dtype='object')

**Unique value for categorical features**

In [733]:
for col in string_cols:
    print(col+ ' unique values')
    display(df_combined[col].value_counts())
    print('*'*50)
    
# replace with "Other" where value is 1 for each cateogory
for col in string_cols:
    counts = df_combined[col].value_counts()
    idx = counts[counts == 1].index
    df_combined.loc[df_combined[col].isin(idx), col] = df_combined[col].value_counts().idxmax()

Bi_Sexual unique values


Joint Gender    238
Name: Bi_Sexual, dtype: int64

**************************************************
Car_Category unique values


Saloon                     6633
Jeep                       2223
Truck                       108
Bus                          56
Mini Bus                     45
Pick Up                      32
Motorcycle                   18
Sedan                        14
Mini Van                     13
Wagon                        10
Station 4 Wheel               6
Shape Of Vehicle Chasis       6
Van                           3
Pick Up 3 Tons                2
Camry Car Hire                1
Tipper Truck                  1
Name: Car_Category, dtype: int64

**************************************************
Female unique values


Female    3679
Name: Female, dtype: int64

**************************************************
Gender unique values


Male      8356
Female    3679
Other      870
Name: Gender, dtype: int64

**************************************************
LGA_Name unique values


Victoria Island    1284
Ikeja               438
Surulere            306
Abuja Municipal     257
Lagos Mainland      245
                   ... 
Owerri North          1
Agbara                1
Asari Toru            1
Okrika                1
Oshimili North        1
Name: LGA_Name, Length: 268, dtype: int64

**************************************************
Male unique values


Male    8356
Name: Male, dtype: int64

**************************************************
No_Gender unique values


No Gender    76
Name: No_Gender, dtype: int64

**************************************************
ProductName unique values


Car Classic         7142
Carsafe             4154
Customized Motor     605
Car Plus             523
Cvtp                 509
Carflex              194
Muuve                100
Motor Cycle           49
Car Vintage            5
Name: ProductName, dtype: int64

**************************************************
Sex unique values


Sex    42
Name: Sex, dtype: int64

**************************************************
State unique values


Lagos              3494
Benue               693
Abuja Municipal     257
Eti Osa             241
Ibeju Lekki         129
                   ... 
Ijebu East            1
Isoko North           1
Kebbi                 1
Essien Udim           1
Oshimili North        1
Name: State, Length: 113, dtype: int64

**************************************************
Subject_Car_Colour unique values


Black             2057
Silver             605
Grey               565
As Attached        555
Blue               398
White              321
Red                274
Green              259
Gold               192
Ash                142
Wine               101
Brown               65
Cream               10
Yellow               8
Dark Gray            7
Orange               5
White & Blue         5
Red & White          4
Purple               4
B Silver             4
Light Green          3
Dark Grey            3
D Red                3
Blue & Red           3
Black & Orange       2
Gray & Gray          2
White & Red          2
Dark Blue            2
Black & White        2
Gray & Silver        2
Red & Black          2
Champagne            1
Red & Yellow         1
Blue Sky             1
Red & Blue           1
Burgundy             1
Red Maroon           1
Dark Green           1
White & Yellow       1
Dark Red             1
Light Gray           1
Yellow & White       1
Beige Mitalic        1
Blue&White&

**************************************************
Subject_Car_Make unique values


Toyota        5466
Honda         1146
Lexus          654
Mercedes       577
Hyundai        512
              ... 
Jincheng         1
Lincoln          1
Ka               1
Brilliance       1
Geely            1
Name: Subject_Car_Make, Length: 74, dtype: int64

**************************************************
Unknown unique values


Not Stated    190
Name: Unknown, dtype: int64

**************************************************
company_type unique values


Entity    300
Name: company_type, dtype: int64

**************************************************


### **Feature Engineering**

**Get date features**

In [734]:
display(date_cols)
date_features = [
    'month', 'day', 'dayofyear', 'dayofweek',
    'days_in_month', 'quarter', 'is_month_start', 'is_month_end', 
    'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end', 
    'is_leap_year'
]

for col in date_cols:
    for feature in date_features:
        df_combined[col+'_'+feature] = getattr(df_combined[col].dt, feature)
        
for col in date_cols:
    for feature in ['week']:
        df_combined[col+'_'+feature] = getattr(df_combined[col].dt.isocalendar(), feature)
        
# weekend?
for col in date_cols:
    df_combined[col+'_isweekend'] = np.where(df_combined[col].dt.day_name().isin(['Saturday', 'Sunday']), True, False)

['Policy Start Date', 'Policy End Date', 'First Transaction Date']

In [735]:
is_cols = [col for col in df_combined.columns if '_is' in col]
display(is_cols)

for col in is_cols:
    print(col+ ': unique features')
    display(df_combined[col].value_counts())
    print('*'*50)

['Policy Start Date_is_month_start',
 'Policy Start Date_is_month_end',
 'Policy Start Date_is_quarter_start',
 'Policy Start Date_is_quarter_end',
 'Policy Start Date_is_year_start',
 'Policy Start Date_is_year_end',
 'Policy Start Date_is_leap_year',
 'Policy End Date_is_month_start',
 'Policy End Date_is_month_end',
 'Policy End Date_is_quarter_start',
 'Policy End Date_is_quarter_end',
 'Policy End Date_is_year_start',
 'Policy End Date_is_year_end',
 'Policy End Date_is_leap_year',
 'First Transaction Date_is_month_start',
 'First Transaction Date_is_month_end',
 'First Transaction Date_is_quarter_start',
 'First Transaction Date_is_quarter_end',
 'First Transaction Date_is_year_start',
 'First Transaction Date_is_year_end',
 'First Transaction Date_is_leap_year',
 'Policy Start Date_isweekend',
 'Policy End Date_isweekend',
 'First Transaction Date_isweekend']

Policy Start Date_is_month_start: unique features


False    12851
True       430
Name: Policy Start Date_is_month_start, dtype: int64

**************************************************
Policy Start Date_is_month_end: unique features


False    12814
True       467
Name: Policy Start Date_is_month_end, dtype: int64

**************************************************
Policy Start Date_is_quarter_start: unique features


False    13110
True       171
Name: Policy Start Date_is_quarter_start, dtype: int64

**************************************************
Policy Start Date_is_quarter_end: unique features


False    13211
True        70
Name: Policy Start Date_is_quarter_end, dtype: int64

**************************************************
Policy Start Date_is_year_start: unique features


False    13217
True        64
Name: Policy Start Date_is_year_start, dtype: int64

**************************************************
Policy Start Date_is_year_end: unique features


False    13260
True        21
Name: Policy Start Date_is_year_end, dtype: int64

**************************************************
Policy Start Date_is_leap_year: unique features


False    13281
Name: Policy Start Date_is_leap_year, dtype: int64

**************************************************
Policy End Date_is_month_start: unique features


False    12830
True       451
Name: Policy End Date_is_month_start, dtype: int64

**************************************************
Policy End Date_is_month_end: unique features


False    12764
True       517
Name: Policy End Date_is_month_end, dtype: int64

**************************************************
Policy End Date_is_quarter_start: unique features


False    13070
True       211
Name: Policy End Date_is_quarter_start, dtype: int64

**************************************************
Policy End Date_is_quarter_end: unique features


False    12998
True       283
Name: Policy End Date_is_quarter_end, dtype: int64

**************************************************
Policy End Date_is_year_start: unique features


False    13211
True        70
Name: Policy End Date_is_year_start, dtype: int64

**************************************************
Policy End Date_is_year_end: unique features


False    13104
True       177
Name: Policy End Date_is_year_end, dtype: int64

**************************************************
Policy End Date_is_leap_year: unique features


False    13277
True         4
Name: Policy End Date_is_leap_year, dtype: int64

**************************************************
First Transaction Date_is_month_start: unique features


False    12851
True       430
Name: First Transaction Date_is_month_start, dtype: int64

**************************************************
First Transaction Date_is_month_end: unique features


False    12814
True       467
Name: First Transaction Date_is_month_end, dtype: int64

**************************************************
First Transaction Date_is_quarter_start: unique features


False    13110
True       171
Name: First Transaction Date_is_quarter_start, dtype: int64

**************************************************
First Transaction Date_is_quarter_end: unique features


False    13211
True        70
Name: First Transaction Date_is_quarter_end, dtype: int64

**************************************************
First Transaction Date_is_year_start: unique features


False    13217
True        64
Name: First Transaction Date_is_year_start, dtype: int64

**************************************************
First Transaction Date_is_year_end: unique features


False    13260
True        21
Name: First Transaction Date_is_year_end, dtype: int64

**************************************************
First Transaction Date_is_leap_year: unique features


False    13281
Name: First Transaction Date_is_leap_year, dtype: int64

**************************************************
Policy Start Date_isweekend: unique features


False    8181
True     5100
Name: Policy Start Date_isweekend, dtype: int64

**************************************************
Policy End Date_isweekend: unique features


False    8334
True     4947
Name: Policy End Date_isweekend, dtype: int64

**************************************************
First Transaction Date_isweekend: unique features


False    8181
True     5100
Name: First Transaction Date_isweekend, dtype: int64

**************************************************


In [736]:
# drop columns that have only one unique value
df_combined.drop(['First Transaction Date_is_leap_year', 
                  'Policy Start Date_is_leap_year'], axis=1, inplace=True)

In [737]:
df_combined

Unnamed: 0,ID,Policy Start Date,Policy End Date,Gender,Age,First Transaction Date,No_Pol,Car_Category,Subject_Car_Colour,Subject_Car_Make,LGA_Name,State,ProductName,target,company_type,Bi_Sexual,Unknown,No_Gender,Sex,Male,Female,Policy Start Date_month,Policy Start Date_day,Policy Start Date_dayofyear,Policy Start Date_dayofweek,Policy Start Date_days_in_month,Policy Start Date_quarter,Policy Start Date_is_month_start,Policy Start Date_is_month_end,Policy Start Date_is_quarter_start,Policy Start Date_is_quarter_end,Policy Start Date_is_year_start,Policy Start Date_is_year_end,Policy End Date_month,Policy End Date_day,Policy End Date_dayofyear,Policy End Date_dayofweek,Policy End Date_days_in_month,Policy End Date_quarter,Policy End Date_is_month_start,Policy End Date_is_month_end,Policy End Date_is_quarter_start,Policy End Date_is_quarter_end,Policy End Date_is_year_start,Policy End Date_is_year_end,Policy End Date_is_leap_year,First Transaction Date_month,First Transaction Date_day,First Transaction Date_dayofyear,First Transaction Date_dayofweek,First Transaction Date_days_in_month,First Transaction Date_quarter,First Transaction Date_is_month_start,First Transaction Date_is_month_end,First Transaction Date_is_quarter_start,First Transaction Date_is_quarter_end,First Transaction Date_is_year_start,First Transaction Date_is_year_end,Policy Start Date_week,Policy End Date_week,First Transaction Date_week,Policy Start Date_isweekend,Policy End Date_isweekend,First Transaction Date_isweekend
0,ID_0040R73,2010-05-14,2011-05-13,Male,30,2010-05-14,1,Saloon,Black,Toyota,,,Car Classic,0.0,,,,,,Male,,5,14,134,4,31,2,False,False,False,False,False,False,5,13,133,4,31,2,False,False,False,False,False,False,False,5,14,134,4,31,2,False,False,False,False,False,False,19,19,19,False,False,False
1,ID_0046BNK,2010-11-29,2011-11-28,Female,79,2010-11-29,1,Jeep,Grey,Toyota,,,Car Classic,1.0,,,,,,,Female,11,29,333,0,30,4,False,False,False,False,False,False,11,28,332,0,30,4,False,False,False,False,False,False,False,11,29,333,0,30,4,False,False,False,False,False,False,48,48,48,False,False,False
2,ID_005QMC3,2010-03-21,2011-03-20,Male,43,2010-03-21,1,Saloon,Red,Toyota,,,Car Classic,0.0,,,,,,Male,,3,21,80,6,31,1,False,False,False,False,False,False,3,20,79,6,31,1,False,False,False,False,False,False,False,3,21,80,6,31,1,False,False,False,False,False,False,11,11,11,True,True,True
3,ID_0079OHW,2010-08-21,2011-08-20,Male,2,2010-08-21,1,,,,,,Carsafe,0.0,,,,,,Male,,8,21,233,5,31,3,False,False,False,False,False,False,8,20,232,5,31,3,False,False,False,False,False,False,False,8,21,233,5,31,3,False,False,False,False,False,False,33,33,33,True,True,True
4,ID_00BRP63,2010-08-29,2010-12-31,,20,2010-08-29,3,,,,Lagos,Lagos,Muuve,1.0,Entity,,,,,,,8,29,241,6,31,3,False,False,False,False,False,False,12,31,365,4,31,4,False,True,False,True,False,True,False,8,29,241,6,31,3,False,False,False,False,False,False,34,52,34,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13276,ID_ZTTHC5X,2010-12-05,2011-12-04,Male,67,2010-12-05,2,Jeep,Black,Toyota,Victoria Island,Lagos,Car Classic,,,,,,,Male,,12,5,339,6,31,4,False,False,False,False,False,False,12,4,338,6,31,4,False,False,False,False,False,False,False,12,5,339,6,31,4,False,False,False,False,False,False,48,48,48,True,True,True
13277,ID_ZUJAFUP,2010-01-14,2011-01-13,Male,43,2010-01-14,1,Saloon,Silver,Hyundai,Surulere,Lagos,Car Classic,,,,,,,Male,,1,14,14,3,31,1,False,False,False,False,False,False,1,13,13,3,31,1,False,False,False,False,False,False,False,1,14,14,3,31,1,False,False,False,False,False,False,2,2,2,False,False,False
13278,ID_ZWHCTUM,2010-07-26,2011-07-25,Male,30,2010-07-26,1,Truck,White,Iveco,Victoria Island,Lagos,Cvtp,,,,,,,Male,,7,26,207,0,31,3,False,False,False,False,False,False,7,25,206,0,31,3,False,False,False,False,False,False,False,7,26,207,0,31,3,False,False,False,False,False,False,30,30,30,False,False,False
13279,ID_ZWQRL8L,2010-02-16,2011-02-15,Male,44,2010-02-16,2,Saloon,,Nissan,Aba North,Aba North,Car Classic,,,,,,,Male,,2,16,47,1,28,1,False,False,False,False,False,False,2,15,46,1,28,1,False,False,False,False,False,False,False,2,16,47,1,28,1,False,False,False,False,False,False,7,7,7,False,False,False


**Replace Age Outliers with Mean**

In [738]:
def outliers(column):
    Q1, Q3 = np.percentile(column, [25, 75])
    IQR = Q3 - Q1
    lower_range = Q1 - (1.5 * IQR)
    upper_range = Q3 + (1.5 * IQR)
    return lower_range, upper_range
 
outliers(df_combined['Age'])

(12.5, 72.5)

In [739]:
groupby_cols = df_combined.select_dtypes(['int64', 'float32', 'int32', 'float64', 'number']).columns.difference(['target'])
cats = df_combined.select_dtypes(['category', 'object']).columns.difference(['ID'])


for col in groupby_cols:

    df_combined[col+'_max'] = df_combined.groupby(['Gender', 'Car_Category', 'LGA_Name',
           'ProductName', 'State', 'Subject_Car_Colour', 'Subject_Car_Make',
           'Unknown', 'company_type'])[col].transform('max')
    
    df_combined[col+'_max'] = df_combined.groupby(['Gender', 'Bi_Sexual', 'Car_Category', 'LGA_Name', 'No_Gender',
           'ProductName', 'State', 'Subject_Car_Colour', 'Subject_Car_Make',
           'Unknown', 'company_type'])[col].transform('min')
    
    df_combined[col+'_max'] = df_combined.groupby(['Gender', 'Bi_Sexual', 'Car_Category', 'Female', 'LGA_Name', 'Male', 'No_Gender',
           'ProductName', 'State', 'Subject_Car_Colour', 'Subject_Car_Make',
           'Unknown', 'company_type'])[col].transform('median')

In [740]:
age_mean = df_combined['Age'].mean()
display(age_mean)

df_combined['Age'] = np.where(df_combined['Age'] <= 12.5, age_mean, df_combined['Age'])
df_combined['Age'] = np.where(df_combined['Age'] >= 72.5, age_mean, df_combined['Age']).astype('int64')

42.37557412845418

### **Missing Values**

In [741]:
df_combined.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13281 entries, 0 to 13280
Data columns (total 87 columns):
 #   Column                                    Non-Null Count  Dtype         
---  ------                                    --------------  -----         
 0   ID                                        13281 non-null  object        
 1   Policy Start Date                         13281 non-null  datetime64[ns]
 2   Policy End Date                           13281 non-null  datetime64[ns]
 3   Gender                                    12905 non-null  object        
 4   Age                                       13281 non-null  int64         
 5   First Transaction Date                    13281 non-null  datetime64[ns]
 6   No_Pol                                    13281 non-null  int64         
 7   Car_Category                              9171 non-null   object        
 8   Subject_Car_Colour                        5622 non-null   object        
 9   Subject_Car_Make            

In [742]:
# fill missing values with out of range number
df_combined = df_combined.replace(np.nan, -9999)

### **Categorical Encoding**

In [743]:
string_cols = df_combined.select_dtypes(['category', 'object']).columns.difference(['ID'])
string_cols

Index(['Bi_Sexual', 'Car_Category', 'Female', 'Gender', 'LGA_Name', 'Male',
       'No_Gender', 'ProductName', 'Sex', 'State', 'Subject_Car_Colour',
       'Subject_Car_Make', 'Unknown', 'company_type'],
      dtype='object')

In [744]:
bd = ce.OneHotEncoder()
dummies = bd.fit_transform(df_combined[string_cols], df_combined['target'])
display(dummies)

df_combined = df_combined.merge(dummies, left_index=True, right_index=True)
df_combined.drop(string_cols, axis=1, inplace=True)

# drop ID and date cols
df_combined.drop(date_cols, axis=1, inplace=True)
df_combined.drop('ID', axis=1, inplace=True)

  elif pd.api.types.is_categorical(cols):


Unnamed: 0,Bi_Sexual_1,Bi_Sexual_2,Car_Category_1,Car_Category_2,Car_Category_3,Car_Category_4,Car_Category_5,Car_Category_6,Car_Category_7,Car_Category_8,Car_Category_9,Car_Category_10,Car_Category_11,Car_Category_12,Car_Category_13,Car_Category_14,Car_Category_15,Female_1,Female_2,Gender_1,Gender_2,Gender_3,Gender_4,LGA_Name_1,LGA_Name_2,LGA_Name_3,LGA_Name_4,LGA_Name_5,LGA_Name_6,LGA_Name_7,LGA_Name_8,LGA_Name_9,LGA_Name_10,LGA_Name_11,LGA_Name_12,LGA_Name_13,LGA_Name_14,LGA_Name_15,LGA_Name_16,LGA_Name_17,LGA_Name_18,LGA_Name_19,LGA_Name_20,LGA_Name_21,LGA_Name_22,LGA_Name_23,LGA_Name_24,LGA_Name_25,LGA_Name_26,LGA_Name_27,LGA_Name_28,LGA_Name_29,LGA_Name_30,LGA_Name_31,LGA_Name_32,LGA_Name_33,LGA_Name_34,LGA_Name_35,LGA_Name_36,LGA_Name_37,LGA_Name_38,LGA_Name_39,LGA_Name_40,LGA_Name_41,LGA_Name_42,LGA_Name_43,LGA_Name_44,LGA_Name_45,LGA_Name_46,LGA_Name_47,LGA_Name_48,LGA_Name_49,LGA_Name_50,LGA_Name_51,LGA_Name_52,LGA_Name_53,LGA_Name_54,LGA_Name_55,LGA_Name_56,LGA_Name_57,LGA_Name_58,LGA_Name_59,LGA_Name_60,LGA_Name_61,LGA_Name_62,LGA_Name_63,LGA_Name_64,LGA_Name_65,LGA_Name_66,LGA_Name_67,LGA_Name_68,LGA_Name_69,LGA_Name_70,LGA_Name_71,LGA_Name_72,LGA_Name_73,LGA_Name_74,LGA_Name_75,LGA_Name_76,LGA_Name_77,LGA_Name_78,LGA_Name_79,LGA_Name_80,LGA_Name_81,LGA_Name_82,LGA_Name_83,LGA_Name_84,LGA_Name_85,LGA_Name_86,LGA_Name_87,LGA_Name_88,LGA_Name_89,LGA_Name_90,LGA_Name_91,LGA_Name_92,LGA_Name_93,LGA_Name_94,LGA_Name_95,LGA_Name_96,LGA_Name_97,LGA_Name_98,LGA_Name_99,LGA_Name_100,LGA_Name_101,LGA_Name_102,LGA_Name_103,LGA_Name_104,LGA_Name_105,LGA_Name_106,LGA_Name_107,LGA_Name_108,LGA_Name_109,LGA_Name_110,LGA_Name_111,LGA_Name_112,LGA_Name_113,LGA_Name_114,LGA_Name_115,LGA_Name_116,LGA_Name_117,LGA_Name_118,LGA_Name_119,LGA_Name_120,LGA_Name_121,LGA_Name_122,LGA_Name_123,LGA_Name_124,LGA_Name_125,LGA_Name_126,LGA_Name_127,LGA_Name_128,LGA_Name_129,LGA_Name_130,LGA_Name_131,LGA_Name_132,LGA_Name_133,LGA_Name_134,LGA_Name_135,LGA_Name_136,LGA_Name_137,LGA_Name_138,LGA_Name_139,LGA_Name_140,LGA_Name_141,LGA_Name_142,LGA_Name_143,LGA_Name_144,LGA_Name_145,LGA_Name_146,LGA_Name_147,LGA_Name_148,LGA_Name_149,LGA_Name_150,LGA_Name_151,LGA_Name_152,LGA_Name_153,LGA_Name_154,LGA_Name_155,LGA_Name_156,LGA_Name_157,LGA_Name_158,LGA_Name_159,LGA_Name_160,LGA_Name_161,LGA_Name_162,LGA_Name_163,LGA_Name_164,LGA_Name_165,LGA_Name_166,LGA_Name_167,LGA_Name_168,LGA_Name_169,LGA_Name_170,LGA_Name_171,LGA_Name_172,LGA_Name_173,LGA_Name_174,LGA_Name_175,LGA_Name_176,LGA_Name_177,LGA_Name_178,LGA_Name_179,LGA_Name_180,LGA_Name_181,LGA_Name_182,LGA_Name_183,LGA_Name_184,LGA_Name_185,LGA_Name_186,LGA_Name_187,LGA_Name_188,LGA_Name_189,LGA_Name_190,LGA_Name_191,LGA_Name_192,LGA_Name_193,Male_1,Male_2,No_Gender_1,No_Gender_2,ProductName_1,ProductName_2,ProductName_3,ProductName_4,ProductName_5,ProductName_6,ProductName_7,ProductName_8,ProductName_9,Sex_1,Sex_2,State_1,State_2,State_3,State_4,State_5,State_6,State_7,State_8,State_9,State_10,State_11,State_12,State_13,State_14,State_15,State_16,State_17,State_18,State_19,State_20,State_21,State_22,State_23,State_24,State_25,State_26,State_27,State_28,State_29,State_30,State_31,State_32,State_33,State_34,State_35,State_36,State_37,State_38,State_39,State_40,State_41,State_42,State_43,State_44,State_45,State_46,State_47,State_48,State_49,State_50,State_51,State_52,State_53,State_54,State_55,State_56,State_57,State_58,State_59,State_60,State_61,State_62,State_63,State_64,State_65,State_66,State_67,State_68,State_69,State_70,State_71,State_72,State_73,State_74,State_75,State_76,State_77,State_78,State_79,State_80,State_81,State_82,State_83,State_84,Subject_Car_Colour_1,Subject_Car_Colour_2,Subject_Car_Colour_3,Subject_Car_Colour_4,Subject_Car_Colour_5,Subject_Car_Colour_6,Subject_Car_Colour_7,Subject_Car_Colour_8,Subject_Car_Colour_9,Subject_Car_Colour_10,Subject_Car_Colour_11,Subject_Car_Colour_12,Subject_Car_Colour_13,Subject_Car_Colour_14,Subject_Car_Colour_15,Subject_Car_Colour_16,Subject_Car_Colour_17,Subject_Car_Colour_18,Subject_Car_Colour_19,Subject_Car_Colour_20,Subject_Car_Colour_21,Subject_Car_Colour_22,Subject_Car_Colour_23,Subject_Car_Colour_24,Subject_Car_Colour_25,Subject_Car_Colour_26,Subject_Car_Colour_27,Subject_Car_Colour_28,Subject_Car_Colour_29,Subject_Car_Colour_30,Subject_Car_Colour_31,Subject_Car_Colour_32,Subject_Car_Make_1,Subject_Car_Make_2,Subject_Car_Make_3,Subject_Car_Make_4,Subject_Car_Make_5,Subject_Car_Make_6,Subject_Car_Make_7,Subject_Car_Make_8,Subject_Car_Make_9,Subject_Car_Make_10,Subject_Car_Make_11,Subject_Car_Make_12,Subject_Car_Make_13,Subject_Car_Make_14,Subject_Car_Make_15,Subject_Car_Make_16,Subject_Car_Make_17,Subject_Car_Make_18,Subject_Car_Make_19,Subject_Car_Make_20,Subject_Car_Make_21,Subject_Car_Make_22,Subject_Car_Make_23,Subject_Car_Make_24,Subject_Car_Make_25,Subject_Car_Make_26,Subject_Car_Make_27,Subject_Car_Make_28,Subject_Car_Make_29,Subject_Car_Make_30,Subject_Car_Make_31,Subject_Car_Make_32,Subject_Car_Make_33,Subject_Car_Make_34,Subject_Car_Make_35,Subject_Car_Make_36,Subject_Car_Make_37,Subject_Car_Make_38,Subject_Car_Make_39,Subject_Car_Make_40,Subject_Car_Make_41,Subject_Car_Make_42,Subject_Car_Make_43,Subject_Car_Make_44,Subject_Car_Make_45,Subject_Car_Make_46,Subject_Car_Make_47,Subject_Car_Make_48,Subject_Car_Make_49,Subject_Car_Make_50,Subject_Car_Make_51,Subject_Car_Make_52,Subject_Car_Make_53,Unknown_1,Unknown_2,company_type_1,company_type_2
0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
4,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13276,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
13277,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
13278,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
13279,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0


In [745]:
# df_combined.drop(['Car_Category_1', 'Gender_1', 'LGA_Name_1', 'ProductName_1', 'State_1', 'Subject_Car_Colour_1', 'Subject_Car_Make_1'], axis=1, inplace=True)

In [746]:
bool_cols = [col for col in df_combined.columns if '_is' in col]
display(bool_cols)

lb = LabelEncoder()
for col in bool_cols:
    df_combined[col] = lb.fit_transform(df_combined[col])

['Policy Start Date_is_month_start',
 'Policy Start Date_is_month_end',
 'Policy Start Date_is_quarter_start',
 'Policy Start Date_is_quarter_end',
 'Policy Start Date_is_year_start',
 'Policy Start Date_is_year_end',
 'Policy End Date_is_month_start',
 'Policy End Date_is_month_end',
 'Policy End Date_is_quarter_start',
 'Policy End Date_is_quarter_end',
 'Policy End Date_is_year_start',
 'Policy End Date_is_year_end',
 'Policy End Date_is_leap_year',
 'First Transaction Date_is_month_start',
 'First Transaction Date_is_month_end',
 'First Transaction Date_is_quarter_start',
 'First Transaction Date_is_quarter_end',
 'First Transaction Date_is_year_start',
 'First Transaction Date_is_year_end',
 'Policy Start Date_isweekend',
 'Policy End Date_isweekend',
 'First Transaction Date_isweekend']

In [747]:
df_combined

Unnamed: 0,Age,No_Pol,target,Policy Start Date_month,Policy Start Date_day,Policy Start Date_dayofyear,Policy Start Date_dayofweek,Policy Start Date_days_in_month,Policy Start Date_quarter,Policy Start Date_is_month_start,Policy Start Date_is_month_end,Policy Start Date_is_quarter_start,Policy Start Date_is_quarter_end,Policy Start Date_is_year_start,Policy Start Date_is_year_end,Policy End Date_month,Policy End Date_day,Policy End Date_dayofyear,Policy End Date_dayofweek,Policy End Date_days_in_month,Policy End Date_quarter,Policy End Date_is_month_start,Policy End Date_is_month_end,Policy End Date_is_quarter_start,Policy End Date_is_quarter_end,Policy End Date_is_year_start,Policy End Date_is_year_end,Policy End Date_is_leap_year,First Transaction Date_month,First Transaction Date_day,First Transaction Date_dayofyear,First Transaction Date_dayofweek,First Transaction Date_days_in_month,First Transaction Date_quarter,First Transaction Date_is_month_start,First Transaction Date_is_month_end,First Transaction Date_is_quarter_start,First Transaction Date_is_quarter_end,First Transaction Date_is_year_start,First Transaction Date_is_year_end,Policy Start Date_week,Policy End Date_week,First Transaction Date_week,Policy Start Date_isweekend,Policy End Date_isweekend,First Transaction Date_isweekend,Age_max,First Transaction Date_day_max,First Transaction Date_dayofweek_max,First Transaction Date_dayofyear_max,First Transaction Date_days_in_month_max,First Transaction Date_month_max,First Transaction Date_quarter_max,First Transaction Date_week_max,No_Pol_max,Policy End Date_day_max,Policy End Date_dayofweek_max,Policy End Date_dayofyear_max,Policy End Date_days_in_month_max,Policy End Date_month_max,Policy End Date_quarter_max,Policy End Date_week_max,Policy Start Date_day_max,Policy Start Date_dayofweek_max,Policy Start Date_dayofyear_max,Policy Start Date_days_in_month_max,Policy Start Date_month_max,Policy Start Date_quarter_max,Policy Start Date_week_max,Bi_Sexual_1,Bi_Sexual_2,Car_Category_1,Car_Category_2,Car_Category_3,Car_Category_4,Car_Category_5,Car_Category_6,Car_Category_7,Car_Category_8,Car_Category_9,Car_Category_10,Car_Category_11,Car_Category_12,Car_Category_13,Car_Category_14,Car_Category_15,Female_1,Female_2,Gender_1,Gender_2,Gender_3,Gender_4,LGA_Name_1,LGA_Name_2,LGA_Name_3,LGA_Name_4,LGA_Name_5,LGA_Name_6,LGA_Name_7,LGA_Name_8,LGA_Name_9,LGA_Name_10,LGA_Name_11,LGA_Name_12,LGA_Name_13,LGA_Name_14,LGA_Name_15,LGA_Name_16,LGA_Name_17,LGA_Name_18,LGA_Name_19,LGA_Name_20,LGA_Name_21,LGA_Name_22,LGA_Name_23,LGA_Name_24,LGA_Name_25,LGA_Name_26,LGA_Name_27,LGA_Name_28,LGA_Name_29,LGA_Name_30,LGA_Name_31,LGA_Name_32,LGA_Name_33,LGA_Name_34,LGA_Name_35,LGA_Name_36,LGA_Name_37,LGA_Name_38,LGA_Name_39,LGA_Name_40,LGA_Name_41,LGA_Name_42,LGA_Name_43,LGA_Name_44,LGA_Name_45,LGA_Name_46,LGA_Name_47,LGA_Name_48,LGA_Name_49,LGA_Name_50,LGA_Name_51,LGA_Name_52,LGA_Name_53,LGA_Name_54,LGA_Name_55,LGA_Name_56,LGA_Name_57,LGA_Name_58,LGA_Name_59,LGA_Name_60,LGA_Name_61,LGA_Name_62,LGA_Name_63,LGA_Name_64,LGA_Name_65,LGA_Name_66,LGA_Name_67,LGA_Name_68,LGA_Name_69,LGA_Name_70,LGA_Name_71,LGA_Name_72,LGA_Name_73,LGA_Name_74,LGA_Name_75,LGA_Name_76,LGA_Name_77,LGA_Name_78,LGA_Name_79,LGA_Name_80,LGA_Name_81,LGA_Name_82,LGA_Name_83,LGA_Name_84,LGA_Name_85,LGA_Name_86,LGA_Name_87,LGA_Name_88,LGA_Name_89,LGA_Name_90,LGA_Name_91,LGA_Name_92,LGA_Name_93,LGA_Name_94,LGA_Name_95,LGA_Name_96,LGA_Name_97,LGA_Name_98,LGA_Name_99,LGA_Name_100,LGA_Name_101,LGA_Name_102,LGA_Name_103,LGA_Name_104,LGA_Name_105,LGA_Name_106,LGA_Name_107,LGA_Name_108,LGA_Name_109,LGA_Name_110,LGA_Name_111,LGA_Name_112,LGA_Name_113,LGA_Name_114,LGA_Name_115,LGA_Name_116,LGA_Name_117,LGA_Name_118,LGA_Name_119,LGA_Name_120,LGA_Name_121,LGA_Name_122,LGA_Name_123,LGA_Name_124,LGA_Name_125,LGA_Name_126,LGA_Name_127,LGA_Name_128,LGA_Name_129,LGA_Name_130,LGA_Name_131,LGA_Name_132,LGA_Name_133,LGA_Name_134,LGA_Name_135,LGA_Name_136,LGA_Name_137,LGA_Name_138,LGA_Name_139,LGA_Name_140,LGA_Name_141,LGA_Name_142,LGA_Name_143,LGA_Name_144,LGA_Name_145,LGA_Name_146,LGA_Name_147,LGA_Name_148,LGA_Name_149,LGA_Name_150,LGA_Name_151,LGA_Name_152,LGA_Name_153,LGA_Name_154,LGA_Name_155,LGA_Name_156,LGA_Name_157,LGA_Name_158,LGA_Name_159,LGA_Name_160,LGA_Name_161,LGA_Name_162,LGA_Name_163,LGA_Name_164,LGA_Name_165,LGA_Name_166,LGA_Name_167,LGA_Name_168,LGA_Name_169,LGA_Name_170,LGA_Name_171,LGA_Name_172,LGA_Name_173,LGA_Name_174,LGA_Name_175,LGA_Name_176,LGA_Name_177,LGA_Name_178,LGA_Name_179,LGA_Name_180,LGA_Name_181,LGA_Name_182,LGA_Name_183,LGA_Name_184,LGA_Name_185,LGA_Name_186,LGA_Name_187,LGA_Name_188,LGA_Name_189,LGA_Name_190,LGA_Name_191,LGA_Name_192,LGA_Name_193,Male_1,Male_2,No_Gender_1,No_Gender_2,ProductName_1,ProductName_2,ProductName_3,ProductName_4,ProductName_5,ProductName_6,ProductName_7,ProductName_8,ProductName_9,Sex_1,Sex_2,State_1,State_2,State_3,State_4,State_5,State_6,State_7,State_8,State_9,State_10,State_11,State_12,State_13,State_14,State_15,State_16,State_17,State_18,State_19,State_20,State_21,State_22,State_23,State_24,State_25,State_26,State_27,State_28,State_29,State_30,State_31,State_32,State_33,State_34,State_35,State_36,State_37,State_38,State_39,State_40,State_41,State_42,State_43,State_44,State_45,State_46,State_47,State_48,State_49,State_50,State_51,State_52,State_53,State_54,State_55,State_56,State_57,State_58,State_59,State_60,State_61,State_62,State_63,State_64,State_65,State_66,State_67,State_68,State_69,State_70,State_71,State_72,State_73,State_74,State_75,State_76,State_77,State_78,State_79,State_80,State_81,State_82,State_83,State_84,Subject_Car_Colour_1,Subject_Car_Colour_2,Subject_Car_Colour_3,Subject_Car_Colour_4,Subject_Car_Colour_5,Subject_Car_Colour_6,Subject_Car_Colour_7,Subject_Car_Colour_8,Subject_Car_Colour_9,Subject_Car_Colour_10,Subject_Car_Colour_11,Subject_Car_Colour_12,Subject_Car_Colour_13,Subject_Car_Colour_14,Subject_Car_Colour_15,Subject_Car_Colour_16,Subject_Car_Colour_17,Subject_Car_Colour_18,Subject_Car_Colour_19,Subject_Car_Colour_20,Subject_Car_Colour_21,Subject_Car_Colour_22,Subject_Car_Colour_23,Subject_Car_Colour_24,Subject_Car_Colour_25,Subject_Car_Colour_26,Subject_Car_Colour_27,Subject_Car_Colour_28,Subject_Car_Colour_29,Subject_Car_Colour_30,Subject_Car_Colour_31,Subject_Car_Colour_32,Subject_Car_Make_1,Subject_Car_Make_2,Subject_Car_Make_3,Subject_Car_Make_4,Subject_Car_Make_5,Subject_Car_Make_6,Subject_Car_Make_7,Subject_Car_Make_8,Subject_Car_Make_9,Subject_Car_Make_10,Subject_Car_Make_11,Subject_Car_Make_12,Subject_Car_Make_13,Subject_Car_Make_14,Subject_Car_Make_15,Subject_Car_Make_16,Subject_Car_Make_17,Subject_Car_Make_18,Subject_Car_Make_19,Subject_Car_Make_20,Subject_Car_Make_21,Subject_Car_Make_22,Subject_Car_Make_23,Subject_Car_Make_24,Subject_Car_Make_25,Subject_Car_Make_26,Subject_Car_Make_27,Subject_Car_Make_28,Subject_Car_Make_29,Subject_Car_Make_30,Subject_Car_Make_31,Subject_Car_Make_32,Subject_Car_Make_33,Subject_Car_Make_34,Subject_Car_Make_35,Subject_Car_Make_36,Subject_Car_Make_37,Subject_Car_Make_38,Subject_Car_Make_39,Subject_Car_Make_40,Subject_Car_Make_41,Subject_Car_Make_42,Subject_Car_Make_43,Subject_Car_Make_44,Subject_Car_Make_45,Subject_Car_Make_46,Subject_Car_Make_47,Subject_Car_Make_48,Subject_Car_Make_49,Subject_Car_Make_50,Subject_Car_Make_51,Subject_Car_Make_52,Subject_Car_Make_53,Unknown_1,Unknown_2,company_type_1,company_type_2
0,30,1,0.0,5,14,134,4,31,2,0,0,0,0,0,0,5,13,133,4,31,2,0,0,0,0,0,0,0,5,14,134,4,31,2,0,0,0,0,0,0,19,19,19,0,0,0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
1,42,1,1.0,11,29,333,0,30,4,0,0,0,0,0,0,11,28,332,0,30,4,0,0,0,0,0,0,0,11,29,333,0,30,4,0,0,0,0,0,0,48,48,48,0,0,0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
2,43,1,0.0,3,21,80,6,31,1,0,0,0,0,0,0,3,20,79,6,31,1,0,0,0,0,0,0,0,3,21,80,6,31,1,0,0,0,0,0,0,11,11,11,1,1,1,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
3,42,1,0.0,8,21,233,5,31,3,0,0,0,0,0,0,8,20,232,5,31,3,0,0,0,0,0,0,0,8,21,233,5,31,3,0,0,0,0,0,0,33,33,33,1,1,1,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
4,20,3,1.0,8,29,241,6,31,3,0,0,0,0,0,0,12,31,365,4,31,4,0,1,0,1,0,1,0,8,29,241,6,31,3,0,0,0,0,0,0,34,52,34,1,0,1,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13276,67,2,-9999.0,12,5,339,6,31,4,0,0,0,0,0,0,12,4,338,6,31,4,0,0,0,0,0,0,0,12,5,339,6,31,4,0,0,0,0,0,0,48,48,48,1,1,1,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
13277,43,1,-9999.0,1,14,14,3,31,1,0,0,0,0,0,0,1,13,13,3,31,1,0,0,0,0,0,0,0,1,14,14,3,31,1,0,0,0,0,0,0,2,2,2,0,0,0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
13278,30,1,-9999.0,7,26,207,0,31,3,0,0,0,0,0,0,7,25,206,0,31,3,0,0,0,0,0,0,0,7,26,207,0,31,3,0,0,0,0,0,0,30,30,30,0,0,0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0
13279,44,2,-9999.0,2,16,47,1,28,1,0,0,0,0,0,0,2,15,46,1,28,1,0,0,0,0,0,0,0,2,16,47,1,28,1,0,0,0,0,0,0,7,7,7,0,0,0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0


In [748]:
df_combined.select_dtypes(['object', 'category']).columns

Index([], dtype='object')

### **Model**

**Separate Training and Testing set**

In [749]:
training_df = df_combined[:training_data]
testing_df = df_combined[training_data:]

main_cols = [col for col in df_combined[training_data:].columns if 'target' not in col] # remove the Target variable from the training set
testing_df = testing_df[main_cols]

# training and testing shapes (Original Format in Rows)
training_df.shape, testing_df.shape

((12079, 473), (1202, 472))

In [750]:
training_df = training_df.drop_duplicates()

In [751]:
X = training_df.iloc[:, training_df.columns != 'target']
y = training_df.iloc[:, training_df.columns == 'target']

**Split data**

In [752]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [753]:
from imblearn.over_sampling import RandomOverSampler
os = RandomOverSampler()
X_train, y_train = os.fit_resample(X_train, y_train)

In [754]:
pipeline = Pipeline([
    ('std', StandardScaler())
])
X_train = pipeline.fit_transform(X_train)
X_test = pipeline.transform(X_test)

# scale provided test set
testing_df = pipeline.fit_transform(testing_df)

In [637]:
def train_predict(classifier, X_train, y_train, X_test, y_test):
    ''''''
    results = {}
    
    # Training
    start = time()
    clf = classifier.fit(X_train, y_train)
    end = time()
    
    results['train_time'] = end - start
    
    # Testing
    start = time()
    predictions = clf.predict(X_test)
    end = time()
    
    results['test_time'] = end - start
    
    # Accuracy score
#     score = accuracy_score(y_test, predictions)
#     results['accuracy_score'] = score
    
    # f1_score
    f_score = f1_score(y_test, predictions)
    results['f1_score'] = f_score
    
    # cross val-score
#     cross_scores = cross_val_score(clf, X_train, y_train, scoring='f1', cv=10)
#     results['f1_validation_score'] = cross_scores.mean()
    
    print('{} has been trained and tested \U0001F600'.format(classifier.__class__.__name__))
    return results

In [456]:
from catboost import CatBoostClassifier

clf_A = RandomForestClassifier()
# clf_B = SGDClassifier()
# clf_C = LogisticRegression()
clf_D = GradientBoostingClassifier()
# clf_E = DecisionTreeClassifier()
clf_F = XGBClassifier()
# clf_G = KNeighborsClassifier()
clf_H = AdaBoostClassifier()
# clf_J = BaggingClassifier()
# clf_K = GaussianNB()
clf_L = LGBMClassifier()
clf_M = MLPClassifier()
clf_O = CatBoostClassifier()


results = {}
for i, clf in enumerate([clf_A, clf_D, clf_F, clf_H, clf_O]):
    clf_name = clf.__class__.__name__
    results[clf_name] = {}
    results[clf_name][i] = train_predict(clf, X_train, y_train, X_test, y_test)
    
display(results)

  clf = classifier.fit(X_train, y_train)


RandomForestClassifier has been trained and tested 😀


  return f(*args, **kwargs)


GradientBoostingClassifier has been trained and tested 😀


  return f(*args, **kwargs)




  return f(*args, **kwargs)


XGBClassifier has been trained and tested 😀
AdaBoostClassifier has been trained and tested 😀
Learning rate set to 0.034767
0:	learn: 0.6781829	total: 26.9ms	remaining: 26.9s
1:	learn: 0.6653608	total: 45.4ms	remaining: 22.7s
2:	learn: 0.6548104	total: 63.3ms	remaining: 21s
3:	learn: 0.6435108	total: 81.7ms	remaining: 20.3s
4:	learn: 0.6331097	total: 102ms	remaining: 20.3s
5:	learn: 0.6245494	total: 123ms	remaining: 20.4s
6:	learn: 0.6166782	total: 143ms	remaining: 20.4s
7:	learn: 0.6099687	total: 162ms	remaining: 20.1s
8:	learn: 0.6034020	total: 181ms	remaining: 19.9s
9:	learn: 0.5993581	total: 199ms	remaining: 19.7s
10:	learn: 0.5938177	total: 218ms	remaining: 19.6s
11:	learn: 0.5888474	total: 236ms	remaining: 19.4s
12:	learn: 0.5839905	total: 254ms	remaining: 19.3s
13:	learn: 0.5802208	total: 276ms	remaining: 19.4s
14:	learn: 0.5769579	total: 298ms	remaining: 19.6s
15:	learn: 0.5738278	total: 317ms	remaining: 19.5s
16:	learn: 0.5705480	total: 335ms	remaining: 19.4s
17:	learn: 0.56822

159:	learn: 0.4584624	total: 3.02s	remaining: 15.9s
160:	learn: 0.4580323	total: 3.05s	remaining: 15.9s
161:	learn: 0.4577397	total: 3.06s	remaining: 15.9s
162:	learn: 0.4575424	total: 3.08s	remaining: 15.8s
163:	learn: 0.4572299	total: 3.1s	remaining: 15.8s
164:	learn: 0.4567857	total: 3.12s	remaining: 15.8s
165:	learn: 0.4566106	total: 3.14s	remaining: 15.8s
166:	learn: 0.4560122	total: 3.15s	remaining: 15.7s
167:	learn: 0.4557142	total: 3.17s	remaining: 15.7s
168:	learn: 0.4554961	total: 3.19s	remaining: 15.7s
169:	learn: 0.4552017	total: 3.21s	remaining: 15.7s
170:	learn: 0.4546876	total: 3.22s	remaining: 15.6s
171:	learn: 0.4544966	total: 3.24s	remaining: 15.6s
172:	learn: 0.4541987	total: 3.26s	remaining: 15.6s
173:	learn: 0.4540296	total: 3.28s	remaining: 15.6s
174:	learn: 0.4532736	total: 3.3s	remaining: 15.6s
175:	learn: 0.4529237	total: 3.32s	remaining: 15.5s
176:	learn: 0.4523202	total: 3.34s	remaining: 15.5s
177:	learn: 0.4518302	total: 3.35s	remaining: 15.5s
178:	learn: 0.

319:	learn: 0.4054042	total: 5.97s	remaining: 12.7s
320:	learn: 0.4050323	total: 5.99s	remaining: 12.7s
321:	learn: 0.4046711	total: 6s	remaining: 12.6s
322:	learn: 0.4043051	total: 6.02s	remaining: 12.6s
323:	learn: 0.4039602	total: 6.04s	remaining: 12.6s
324:	learn: 0.4035949	total: 6.06s	remaining: 12.6s
325:	learn: 0.4031948	total: 6.08s	remaining: 12.6s
326:	learn: 0.4028456	total: 6.1s	remaining: 12.5s
327:	learn: 0.4026211	total: 6.11s	remaining: 12.5s
328:	learn: 0.4022359	total: 6.13s	remaining: 12.5s
329:	learn: 0.4019090	total: 6.15s	remaining: 12.5s
330:	learn: 0.4016635	total: 6.17s	remaining: 12.5s
331:	learn: 0.4014791	total: 6.19s	remaining: 12.5s
332:	learn: 0.4012880	total: 6.21s	remaining: 12.4s
333:	learn: 0.4010113	total: 6.23s	remaining: 12.4s
334:	learn: 0.4007494	total: 6.25s	remaining: 12.4s
335:	learn: 0.4003513	total: 6.27s	remaining: 12.4s
336:	learn: 0.4000700	total: 6.29s	remaining: 12.4s
337:	learn: 0.3997209	total: 6.3s	remaining: 12.3s
338:	learn: 0.399

486:	learn: 0.3570839	total: 9.08s	remaining: 9.56s
487:	learn: 0.3568588	total: 9.11s	remaining: 9.55s
488:	learn: 0.3565055	total: 9.13s	remaining: 9.54s
489:	learn: 0.3562449	total: 9.14s	remaining: 9.52s
490:	learn: 0.3560933	total: 9.16s	remaining: 9.5s
491:	learn: 0.3558851	total: 9.18s	remaining: 9.48s
492:	learn: 0.3555382	total: 9.2s	remaining: 9.46s
493:	learn: 0.3553256	total: 9.22s	remaining: 9.44s
494:	learn: 0.3550720	total: 9.23s	remaining: 9.42s
495:	learn: 0.3548810	total: 9.25s	remaining: 9.4s
496:	learn: 0.3546636	total: 9.27s	remaining: 9.38s
497:	learn: 0.3545132	total: 9.29s	remaining: 9.37s
498:	learn: 0.3542141	total: 9.31s	remaining: 9.35s
499:	learn: 0.3538671	total: 9.33s	remaining: 9.33s
500:	learn: 0.3537537	total: 9.35s	remaining: 9.31s
501:	learn: 0.3534925	total: 9.37s	remaining: 9.29s
502:	learn: 0.3532215	total: 9.38s	remaining: 9.27s
503:	learn: 0.3530346	total: 9.4s	remaining: 9.25s
504:	learn: 0.3528863	total: 9.42s	remaining: 9.23s
505:	learn: 0.35

654:	learn: 0.3202010	total: 12.2s	remaining: 6.45s
655:	learn: 0.3199296	total: 12.3s	remaining: 6.43s
656:	learn: 0.3196229	total: 12.3s	remaining: 6.41s
657:	learn: 0.3194742	total: 12.3s	remaining: 6.39s
658:	learn: 0.3191936	total: 12.3s	remaining: 6.37s
659:	learn: 0.3189846	total: 12.3s	remaining: 6.36s
660:	learn: 0.3188029	total: 12.4s	remaining: 6.34s
661:	learn: 0.3186276	total: 12.4s	remaining: 6.32s
662:	learn: 0.3182798	total: 12.4s	remaining: 6.3s
663:	learn: 0.3180517	total: 12.4s	remaining: 6.28s
664:	learn: 0.3178812	total: 12.4s	remaining: 6.26s
665:	learn: 0.3176499	total: 12.5s	remaining: 6.24s
666:	learn: 0.3174127	total: 12.5s	remaining: 6.22s
667:	learn: 0.3172882	total: 12.5s	remaining: 6.21s
668:	learn: 0.3171223	total: 12.5s	remaining: 6.19s
669:	learn: 0.3170582	total: 12.5s	remaining: 6.17s
670:	learn: 0.3169135	total: 12.5s	remaining: 6.15s
671:	learn: 0.3167212	total: 12.6s	remaining: 6.13s
672:	learn: 0.3165718	total: 12.6s	remaining: 6.11s
673:	learn: 0

819:	learn: 0.2933708	total: 15.3s	remaining: 3.37s
820:	learn: 0.2932735	total: 15.4s	remaining: 3.35s
821:	learn: 0.2931277	total: 15.4s	remaining: 3.33s
822:	learn: 0.2927689	total: 15.4s	remaining: 3.31s
823:	learn: 0.2925918	total: 15.4s	remaining: 3.29s
824:	learn: 0.2925546	total: 15.4s	remaining: 3.27s
825:	learn: 0.2924862	total: 15.4s	remaining: 3.25s
826:	learn: 0.2923538	total: 15.5s	remaining: 3.23s
827:	learn: 0.2922162	total: 15.5s	remaining: 3.22s
828:	learn: 0.2919360	total: 15.5s	remaining: 3.2s
829:	learn: 0.2916912	total: 15.5s	remaining: 3.18s
830:	learn: 0.2915517	total: 15.5s	remaining: 3.16s
831:	learn: 0.2914157	total: 15.6s	remaining: 3.14s
832:	learn: 0.2912383	total: 15.6s	remaining: 3.12s
833:	learn: 0.2910731	total: 15.6s	remaining: 3.1s
834:	learn: 0.2909444	total: 15.6s	remaining: 3.09s
835:	learn: 0.2907991	total: 15.6s	remaining: 3.07s
836:	learn: 0.2907051	total: 15.7s	remaining: 3.05s
837:	learn: 0.2905367	total: 15.7s	remaining: 3.03s
838:	learn: 0.

981:	learn: 0.2701425	total: 18.5s	remaining: 338ms
982:	learn: 0.2699887	total: 18.5s	remaining: 319ms
983:	learn: 0.2697810	total: 18.5s	remaining: 301ms
984:	learn: 0.2696607	total: 18.5s	remaining: 282ms
985:	learn: 0.2694803	total: 18.5s	remaining: 263ms
986:	learn: 0.2692547	total: 18.6s	remaining: 244ms
987:	learn: 0.2691138	total: 18.6s	remaining: 226ms
988:	learn: 0.2689900	total: 18.6s	remaining: 207ms
989:	learn: 0.2688427	total: 18.6s	remaining: 188ms
990:	learn: 0.2687136	total: 18.6s	remaining: 169ms
991:	learn: 0.2685378	total: 18.6s	remaining: 150ms
992:	learn: 0.2684171	total: 18.7s	remaining: 132ms
993:	learn: 0.2682311	total: 18.7s	remaining: 113ms
994:	learn: 0.2681352	total: 18.7s	remaining: 94ms
995:	learn: 0.2679622	total: 18.7s	remaining: 75.2ms
996:	learn: 0.2678709	total: 18.7s	remaining: 56.4ms
997:	learn: 0.2677491	total: 18.8s	remaining: 37.6ms
998:	learn: 0.2675202	total: 18.8s	remaining: 18.8ms
999:	learn: 0.2673240	total: 18.8s	remaining: 0us
CatBoostCla

{'RandomForestClassifier': {0: {'train_time': 8.173691749572754,
   'test_time': 0.05798697471618652,
   'f1_score': 0.14705882352941177}},
 'GradientBoostingClassifier': {1: {'train_time': 16.324738025665283,
   'test_time': 0.005998849868774414,
   'f1_score': 0.3607924921793535}},
 'XGBClassifier': {2: {'train_time': 6.992316484451294,
   'test_time': 0.013999223709106445,
   'f1_score': 0.2920892494929006}},
 'AdaBoostClassifier': {3: {'train_time': 5.567032814025879,
   'test_time': 0.1679985523223877,
   'f1_score': 0.3426724137931035}},
 'CatBoostClassifier': {4: {'train_time': 19.612268924713135,
   'test_time': 0.02717304229736328,
   'f1_score': 0.3327556325823224}}}

In [755]:
from catboost import CatBoostClassifier
params = {
#     'loss_function': 'CrossEntropy',# objective function
   'loss_function' :'Logloss',
    'iterations': 10000,
   # 'custom_loss': 'F1',
    'learning_rate': 0.02286072,
    'eval_metric':'F1', # metric,
    #'task_type':'GPU',
    #'border_count':32,
    'early_stopping_rounds':200,
    'verbose': 200, # output to stdout info about training process every 200 iterations
   # 'random_seed': SEED
    
}

cat = CatBoostClassifier(**params)
cat.fit(X_train, y_train)

predictions = cat.predict(X_test)
f1_s = f1_score(y_test, predictions)
f1_s

0:	learn: 0.7678767	total: 12.5ms	remaining: 2m 4s
200:	learn: 0.8090796	total: 3.21s	remaining: 2m 36s
400:	learn: 0.8315730	total: 7.19s	remaining: 2m 52s
600:	learn: 0.8503000	total: 10.5s	remaining: 2m 43s
800:	learn: 0.8713123	total: 13.9s	remaining: 2m 40s
1000:	learn: 0.8822432	total: 17.2s	remaining: 2m 34s
1200:	learn: 0.8912401	total: 20.3s	remaining: 2m 28s
1400:	learn: 0.9000432	total: 23.5s	remaining: 2m 23s
1600:	learn: 0.9082813	total: 26.7s	remaining: 2m 20s
1800:	learn: 0.9159031	total: 30.1s	remaining: 2m 16s
2000:	learn: 0.9203394	total: 33.4s	remaining: 2m 13s
2200:	learn: 0.9243347	total: 36.6s	remaining: 2m 9s
2400:	learn: 0.9287637	total: 39.9s	remaining: 2m 6s
2600:	learn: 0.9313002	total: 43.2s	remaining: 2m 2s
2800:	learn: 0.9355301	total: 46.4s	remaining: 1m 59s
3000:	learn: 0.9368543	total: 49.5s	remaining: 1m 55s
3200:	learn: 0.9396203	total: 52.9s	remaining: 1m 52s
3400:	learn: 0.9419663	total: 56.1s	remaining: 1m 48s
3600:	learn: 0.9447839	total: 59.4s	re

0.24875621890547267

In [756]:
subfile = pd.DataFrame(df_test['ID'])
subfile['target'] = cat.predict(testing_df).astype('int64')

In [757]:
subfile['target'].value_counts()

0    1092
1     110
Name: target, dtype: int64

In [648]:
subfile.to_csv('110.csv', index=False)