<a href="https://colab.research.google.com/github/JoaquinGonzalezSimon/Data_science_and_ML_from_Medium/blob/main/230412_Insurance_Prediction_with_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### This project was taken from the following link

https://thecleverprogrammer.com/2021/09/03/insurance-prediction-with-machine-learning/

In [1]:
import pandas as pd
import numpy as np

import plotly.express as px

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

In [2]:
data = pd.read_csv('https://raw.githubusercontent.com/amankharwal/Website-data/master/TravelInsurancePrediction.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,Age,Employment Type,GraduateOrNot,AnnualIncome,FamilyMembers,ChronicDiseases,FrequentFlyer,EverTravelledAbroad,TravelInsurance
0,0,31,Government Sector,Yes,400000,6,1,No,No,0
1,1,31,Private Sector/Self Employed,Yes,1250000,7,0,No,No,0
2,2,34,Private Sector/Self Employed,Yes,500000,4,1,No,No,1
3,3,28,Private Sector/Self Employed,Yes,700000,3,1,No,No,0
4,4,28,Private Sector/Self Employed,Yes,700000,8,1,Yes,No,0


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1987 entries, 0 to 1986
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Unnamed: 0           1987 non-null   int64 
 1   Age                  1987 non-null   int64 
 2   Employment Type      1987 non-null   object
 3   GraduateOrNot        1987 non-null   object
 4   AnnualIncome         1987 non-null   int64 
 5   FamilyMembers        1987 non-null   int64 
 6   ChronicDiseases      1987 non-null   int64 
 7   FrequentFlyer        1987 non-null   object
 8   EverTravelledAbroad  1987 non-null   object
 9   TravelInsurance      1987 non-null   int64 
dtypes: int64(6), object(4)
memory usage: 155.4+ KB


In [4]:
data.isnull().sum()

Unnamed: 0             0
Age                    0
Employment Type        0
GraduateOrNot          0
AnnualIncome           0
FamilyMembers          0
ChronicDiseases        0
FrequentFlyer          0
EverTravelledAbroad    0
TravelInsurance        0
dtype: int64

In [5]:
data.drop('Unnamed: 0', axis=1, inplace=True)

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1987 entries, 0 to 1986
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Age                  1987 non-null   int64 
 1   Employment Type      1987 non-null   object
 2   GraduateOrNot        1987 non-null   object
 3   AnnualIncome         1987 non-null   int64 
 4   FamilyMembers        1987 non-null   int64 
 5   ChronicDiseases      1987 non-null   int64 
 6   FrequentFlyer        1987 non-null   object
 7   EverTravelledAbroad  1987 non-null   object
 8   TravelInsurance      1987 non-null   int64 
dtypes: int64(5), object(4)
memory usage: 139.8+ KB


In [7]:
data.TravelInsurance.value_counts()

0    1277
1     710
Name: TravelInsurance, dtype: int64

In [8]:
data['TravelInsurance'] = data['TravelInsurance'].map({0:'Not Purchased', 1:'Purchased'})
data.TravelInsurance.value_counts()

Not Purchased    1277
Purchased         710
Name: TravelInsurance, dtype: int64

In [9]:
graph_cols = ['Age', 'Employment Type', 'AnnualIncome']

In [10]:
for col in graph_cols:
  figure = px.histogram(data, x=col, color='TravelInsurance', title=f'Factors affecting purchase of travel insurance: {col}')
  figure.show()

In [11]:
le = LabelEncoder()

In [12]:
encode_cols = ['GraduateOrNot', 'FrequentFlyer', 'EverTravelledAbroad']

In [13]:
for col in encode_cols:
  data['le'+col] = le.fit_transform(data[col])

In [14]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1987 entries, 0 to 1986
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Age                    1987 non-null   int64 
 1   Employment Type        1987 non-null   object
 2   GraduateOrNot          1987 non-null   object
 3   AnnualIncome           1987 non-null   int64 
 4   FamilyMembers          1987 non-null   int64 
 5   ChronicDiseases        1987 non-null   int64 
 6   FrequentFlyer          1987 non-null   object
 7   EverTravelledAbroad    1987 non-null   object
 8   TravelInsurance        1987 non-null   object
 9   leGraduateOrNot        1987 non-null   int64 
 10  leFrequentFlyer        1987 non-null   int64 
 11  leEverTravelledAbroad  1987 non-null   int64 
dtypes: int64(7), object(5)
memory usage: 186.4+ KB


In [15]:
data.head()

Unnamed: 0,Age,Employment Type,GraduateOrNot,AnnualIncome,FamilyMembers,ChronicDiseases,FrequentFlyer,EverTravelledAbroad,TravelInsurance,leGraduateOrNot,leFrequentFlyer,leEverTravelledAbroad
0,31,Government Sector,Yes,400000,6,1,No,No,Not Purchased,1,0,0
1,31,Private Sector/Self Employed,Yes,1250000,7,0,No,No,Not Purchased,1,0,0
2,34,Private Sector/Self Employed,Yes,500000,4,1,No,No,Purchased,1,0,0
3,28,Private Sector/Self Employed,Yes,700000,3,1,No,No,Not Purchased,1,0,0
4,28,Private Sector/Self Employed,Yes,700000,8,1,Yes,No,Not Purchased,1,1,0


In [16]:
x = np.array(data[['Age', 'leGraduateOrNot', 'AnnualIncome', 'FamilyMembers', 'ChronicDiseases', 'leFrequentFlyer', 'leEverTravelledAbroad']])
y = np.array(data[['TravelInsurance']])

In [17]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2)
xtrain.shape, xtest.shape, ytrain.shape, ytest.shape

((1589, 7), (398, 7), (1589, 1), (398, 1))

In [18]:
model = DecisionTreeClassifier()
model.fit(xtrain, ytrain)
pred = model.predict(xtest)

In [19]:
pred

array(['Not Purchased', 'Purchased', 'Not Purchased', 'Purchased',
       'Purchased', 'Not Purchased', 'Not Purchased', 'Not Purchased',
       'Not Purchased', 'Purchased', 'Not Purchased', 'Not Purchased',
       'Not Purchased', 'Not Purchased', 'Not Purchased', 'Not Purchased',
       'Not Purchased', 'Not Purchased', 'Not Purchased', 'Not Purchased',
       'Purchased', 'Not Purchased', 'Not Purchased', 'Not Purchased',
       'Not Purchased', 'Not Purchased', 'Not Purchased', 'Not Purchased',
       'Not Purchased', 'Purchased', 'Not Purchased', 'Not Purchased',
       'Not Purchased', 'Purchased', 'Purchased', 'Purchased',
       'Not Purchased', 'Purchased', 'Not Purchased', 'Purchased',
       'Not Purchased', 'Not Purchased', 'Not Purchased', 'Not Purchased',
       'Not Purchased', 'Not Purchased', 'Not Purchased', 'Not Purchased',
       'Not Purchased', 'Not Purchased', 'Not Purchased', 'Not Purchased',
       'Not Purchased', 'Not Purchased', 'Not Purchased', 'Not Purcha