## Import Libraries

In [1]:
import pandas as pd
import numpy as np

# Make and Compose Pipeline
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

# Import SimpleImputer
from sklearn.impute import SimpleImputer

# Import Scaling and transformation Libraries
from sklearn.preprocessing import StandardScaler, OrdinalEncoder

# Import Decision tree
from sklearn.tree import DecisionTreeClassifier

## Import Data

In [2]:
train = pd.read_csv('input/train.csv')
test = pd.read_csv('input/test.csv')

In [3]:
#Identify Numerical Features
numerical_features = ['Age','Region_Code','Annual_Premium','Policy_Sales_Channel','Vintage']

#Identify Categorical Features
categorical_features = ['Gender','Vehicle_Age','Vehicle_Damage']

## Train Validation Split

In [4]:
y = train['Response']
X = train.drop(['id','Response'], axis = 1)

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.30, random_state=42)

## Build Pipeline

In [7]:
preprocessor = make_column_transformer(

    (make_pipeline(
        SimpleImputer(strategy = 'mean'),
        StandardScaler()), numerical_features),
    
    (make_pipeline(        
        SimpleImputer(strategy = 'most_frequent'),
        OrdinalEncoder(categories = 'auto')), categorical_features),

)

In [18]:
#model = make_pipeline(preprocessor, DecisionTreeClassifier())
dt = make_pipeline(preprocessor, DecisionTreeClassifier())

## Build Model

In [15]:
from sklearn.model_selection import GridSearchCV

In [27]:
parameters = {
    'decisiontreeclassifier__criterion':('gini', 'entropy')
    , 'decisiontreeclassifier__max_depth':[5,10,15,20,30]
    , 'decisiontreeclassifier__min_samples_split': [2,4,6,8]
}

In [28]:
model = GridSearchCV(dt, parameters, verbose = 2, n_jobs = -1)

In [29]:
model.fit(X_train, y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits


GridSearchCV(estimator=Pipeline(steps=[('columntransformer',
                                        ColumnTransformer(transformers=[('pipeline-1',
                                                                         Pipeline(steps=[('simpleimputer',
                                                                                          SimpleImputer()),
                                                                                         ('standardscaler',
                                                                                          StandardScaler())]),
                                                                         ['Age',
                                                                          'Region_Code',
                                                                          'Annual_Premium',
                                                                          'Policy_Sales_Channel',
                                                          

## Check Accurary

In [30]:
print(f"Train Accuracy : {model.score(X_train, y_train):0.1%}")
print(f"Test Accuracy : {model.score(X_val, y_val):0.1%}")

Train Accuracy : 87.8%
Test Accuracy : 87.6%
