In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve, silhouette_samples, silhouette_score
plt.style.use('fivethirtyeight')
%matplotlib inline

In [2]:
insurance = pd.read_csv('insurance_part2_data.csv')
insurance.head()

Unnamed: 0,Age,Agency_Code,Type,Claimed,Commision,Channel,Duration,Sales,Product Name,Destination
0,48,C2B,Airlines,No,0.7,Online,7,2.51,Customised Plan,ASIA
1,36,EPX,Travel Agency,No,0.0,Online,34,20.0,Customised Plan,ASIA
2,39,CWT,Travel Agency,No,5.94,Online,3,9.9,Customised Plan,Americas
3,36,EPX,Travel Agency,No,0.0,Online,4,26.0,Cancellation Plan,ASIA
4,33,JZI,Airlines,No,6.3,Online,53,18.0,Bronze Plan,ASIA


In [3]:
insurance.describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
Age,3000,,,,38.091,10.4635,8.0,32.0,36.0,42.0,84.0
Agency_Code,3000,4.0,EPX,1365.0,,,,,,,
Type,3000,2.0,Travel Agency,1837.0,,,,,,,
Claimed,3000,2.0,No,2076.0,,,,,,,
Commision,3000,,,,14.5292,25.4815,0.0,0.0,4.63,17.235,210.21
Channel,3000,2.0,Online,2954.0,,,,,,,
Duration,3000,,,,70.0013,134.053,-1.0,11.0,26.5,63.0,4580.0
Sales,3000,,,,60.2499,70.734,0.0,20.0,33.0,69.0,539.0
Product Name,3000,5.0,Customised Plan,1136.0,,,,,,,
Destination,3000,3.0,ASIA,2465.0,,,,,,,


In [4]:
insurance[(insurance.Sales == 0) | (insurance.Commision == 0)]

Unnamed: 0,Age,Agency_Code,Type,Claimed,Commision,Channel,Duration,Sales,Product Name,Destination
1,36,EPX,Travel Agency,No,0.0,Online,34,20.0,Customised Plan,ASIA
3,36,EPX,Travel Agency,No,0.0,Online,4,26.0,Cancellation Plan,ASIA
7,36,EPX,Travel Agency,No,0.0,Online,16,80.0,Cancellation Plan,ASIA
8,36,EPX,Travel Agency,No,0.0,Online,19,14.0,Cancellation Plan,ASIA
9,36,EPX,Travel Agency,No,0.0,Online,42,43.0,Cancellation Plan,ASIA
...,...,...,...,...,...,...,...,...,...,...
2988,36,EPX,Travel Agency,Yes,0.0,Online,18,36.0,Customised Plan,ASIA
2989,35,EPX,Travel Agency,No,0.0,Online,26,20.0,Customised Plan,ASIA
2990,51,EPX,Travel Agency,No,0.0,Online,2,20.0,Customised Plan,ASIA
2993,36,EPX,Travel Agency,No,0.0,Online,234,10.0,Cancellation Plan,ASIA


In [5]:
insurance[(insurance.Sales == 0)]

Unnamed: 0,Age,Agency_Code,Type,Claimed,Commision,Channel,Duration,Sales,Product Name,Destination
131,53,JZI,Airlines,No,12.95,Online,93,0.0,Bronze Plan,ASIA
162,36,EPX,Travel Agency,No,0.0,Online,2,0.0,Customised Plan,ASIA
323,54,CWT,Travel Agency,No,100.98,Online,18,0.0,Customised Plan,Americas
483,44,CWT,Travel Agency,No,11.88,Online,10,0.0,Customised Plan,ASIA
513,31,CWT,Travel Agency,No,83.16,Online,99,0.0,Customised Plan,EUROPE
537,34,CWT,Travel Agency,No,11.88,Online,45,0.0,Customised Plan,ASIA
612,33,CWT,Travel Agency,No,17.82,Online,4,0.0,Customised Plan,ASIA
646,31,CWT,Travel Agency,No,11.88,Online,144,0.0,Customised Plan,ASIA
703,30,JZI,Airlines,No,7.7,Online,9,0.0,Bronze Plan,ASIA
766,36,EPX,Travel Agency,No,0.0,Online,7,0.0,Cancellation Plan,ASIA


In [6]:
insurance.duplicated().sum()

139

In [8]:
round((139/3000)*100,2)

4.63

In [9]:
insurance[insurance.duplicated()]

Unnamed: 0,Age,Agency_Code,Type,Claimed,Commision,Channel,Duration,Sales,Product Name,Destination
63,30,C2B,Airlines,Yes,15.0,Online,27,60.0,Bronze Plan,ASIA
329,36,EPX,Travel Agency,No,0.0,Online,5,20.0,Customised Plan,ASIA
407,36,EPX,Travel Agency,No,0.0,Online,11,19.0,Cancellation Plan,ASIA
411,35,EPX,Travel Agency,No,0.0,Online,2,20.0,Customised Plan,ASIA
422,36,EPX,Travel Agency,No,0.0,Online,5,20.0,Customised Plan,ASIA
...,...,...,...,...,...,...,...,...,...,...
2940,36,EPX,Travel Agency,No,0.0,Online,8,10.0,Cancellation Plan,ASIA
2947,36,EPX,Travel Agency,No,0.0,Online,10,28.0,Customised Plan,ASIA
2952,36,EPX,Travel Agency,No,0.0,Online,2,10.0,Cancellation Plan,ASIA
2962,36,EPX,Travel Agency,No,0.0,Online,4,20.0,Customised Plan,ASIA


In [10]:
insurance1 = insurance.copy()

In [11]:
insurance1.drop_duplicates(inplace=True)
insurance1.shape

(2861, 10)

In [14]:
for col in insurance.columns:
    if insurance[col].dtype.name == 'object':
        insurance[col] = pd.Categorical(insurance[col]).codes

In [15]:
X = insurance.drop(['Claimed'], axis = 1)
y = insurance['Claimed']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 50)

In [16]:
sc = StandardScaler()
X_train_ann = pd.DataFrame(sc.fit_transform(X_train), columns=X_train.columns)
X_test_ann = pd.DataFrame(sc.transform(X_test), columns=X_test.columns)

In [17]:
params = { 'hidden_layer_sizes': tuple(np.arange(100,900,100)),
           'activation': ['relu', 'logistic'],
           'solver': ['sgd', 'adam'],
           'tol': [0.000001,0.01, 0.1],
           'max_iter': np.arange(1000,4000,1000)
        }

In [None]:
ann_model = MLPClassifier()
grid_cv = GridSearchCV(estimator=ann_model, param_grid=params, cv=4)
grid_cv.fit(X_train_ann, y_train)

