In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_excel('insurance_data.xlsx')
df.head()

Unnamed: 0,age,bought_insurance
0,22,0
1,25,0
2,47,1
3,52,0
4,46,1


### Problem Statement - Based on Age, predict whether the person bought the insurance or not

In [3]:
df.shape
# rows = 27, cols=2

(27, 2)

#### Handling Null values

In [4]:
df.isnull().sum()

age                 0
bought_insurance    0
dtype: int64

#### Handle the duplicates

In [5]:
df.duplicated().sum()

1

In [6]:
df.drop_duplicates(inplace=True)
df.duplicated().sum()

0

In [7]:
df.shape

(26, 2)

#### Select x and y 

In [8]:
x = df[['age']]   # independent featrures
y = df['bought_insurance']  # dependent features
print(x.shape)
print(y.shape)
print(type(x),type(y))

(26, 1)
(26,)
<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'>


#### Split the data into train and test data

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
print(x.shape)
print(0.25*26)

(26, 1)
6.5


In [11]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=30)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(19, 1)
(7, 1)
(19,)
(7,)


### ML Model

In [12]:
from sklearn.linear_model import LogisticRegression

In [13]:
m1 = LogisticRegression()
m1.fit(x_train,y_train)

In [14]:
print('Train Score',m1.score(x_train,y_train))  # Train Accuracy
print('Test Score',m1.score(x_test,y_test))     # Test Accuracy
# Overfit model

Train Score 0.8947368421052632
Test Score 0.8571428571428571


In [15]:
m = m1.coef_
c = m1.intercept_
print(m)
print(c)

[[0.14890449]]
[-5.38983919]


In [16]:
from sklearn.metrics import confusion_matrix,classification_report

In [17]:
ypred_m1 = m1.predict(x_test)
print(ypred_m1)

[1 0 0 1 0 1 1]


In [18]:
cm = confusion_matrix(y_test,ypred_m1)
print(cm)                  
print(classification_report(y_test,ypred_m1))

[[3 1]
 [0 3]]
              precision    recall  f1-score   support

           0       1.00      0.75      0.86         4
           1       0.75      1.00      0.86         3

    accuracy                           0.86         7
   macro avg       0.88      0.88      0.86         7
weighted avg       0.89      0.86      0.86         7



#### Predict whether the person boght the insurance or not when 
a) age=57<br>
b) age=23

In [19]:
ypred_57 = m1.predict([[57]])
print(ypred_57)
ypred_23 = m1.predict([[23]])
print(ypred_23)

[1]
[0]


