In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('insurance.csv')
df.head()

# 1. Preprocessing

In [None]:
df.head(3)

In [None]:
df.tail(3)

In [None]:
print('The number of rows and columns in the insurance dataframe is: ',df.shape)

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.describe(include = 'O')

In [None]:
df.describe()

In [None]:
df.sex.unique()

In [None]:
df['sex'] = df['sex'].map({'female':0, 'male':1})
df['smoker'] = df['smoker'].map({'yes':0, 'no':1})
df['region'] = df['region'].map({'southwest':1, 'southeast':2, 'northwest':3, 'northeast':4})
df.head()

# Predictors and Target Variables

In [None]:
X = df.drop(columns = 'charges', axis = 1)
y = df['charges']

X.shape, y.shape

# Train and Test Split

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,train_size = 0.7, random_state = 29)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

# Import the models

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor

# Model Training

In [None]:
lr = LinearRegression().fit(X_train, y_train)
svr = SVR().fit(X_train, y_train)
rf = RandomForestRegressor().fit(X_train, y_train)
gr = GradientBoostingRegressor().fit(X_train, y_train)

# Prediction on test data

In [None]:
y_pred1 = lr.predict(X_test)
y_pred2 = svr.predict(X_test)
y_pred3 = rf.predict(X_test)
y_pred4 = gr.predict(X_test)

df1 = pd.DataFrame({'Actual':y_test,'lr':y_pred1, 
                  'svr':y_pred2,'rf':y_pred3,
                  'gr':y_pred4})
df2 = X_test.join(df1)
df2

# Compare the performance Visually

In [None]:
import matplotlib.pyplot as plt

In [None]:
df1.columns

In [None]:
plt.figure(figsize = (15,10))

plt.subplot(2,2,1)
plt.plot(df1.Actual.iloc[0:11], label = 'Actual')
plt.plot(df1.lr.iloc[0:11], label = 'lr')
plt.legend()

plt.subplot(2,2,2)
plt.plot(df1.Actual.iloc[0:11], label = 'Actual')
plt.plot(df1.svr.iloc[0:11], label = 'svr')
plt.legend()

plt.subplot(2,2,3)
plt.plot(df1.Actual.iloc[0:11], label = 'Actual')
plt.plot(df1.rf.iloc[0:11], label = 'rf')
plt.legend()

plt.subplot(2,2,4)
plt.plot(df1.Actual.iloc[0:11], label = 'Actual')
plt.plot(df1.gr.iloc[0:11], label = 'gr')                         
plt.legend()          

plt.tight_layout()
plt.show()

# Evaluate the models
(using Rsquare to find how well a regression model (independent variable) predicts the outcome of observed data (dependent variable))

In [None]:
from sklearn.metrics import r2_score, mean_absolute_error

score1 = r2_score(y_test, y_pred1)
score2 = r2_score(y_test, y_pred2)
score3 = r2_score(y_test, y_pred3)
score4 = r2_score(y_test, y_pred4)

score1, score2, score3, score4

In [None]:
s1 = mean_absolute_error(y_test,y_pred1)
s2 = mean_absolute_error(y_test,y_pred2)
s3 = mean_absolute_error(y_test,y_pred3)
s4 = mean_absolute_error(y_test,y_pred4)

s1,s2,s3,s4

# Predict charges for New Customer

In [None]:
data = {'age':40,
       'sex':1,
       'bmi':40.30,
       'children':4,
       'smoker':1,
       'region':2}
df3 = pd.DataFrame(data,index = [0])
df3

In [None]:
gr.predict(df3)

# Train the model of the entire dataset before deployment using Joblib

In [None]:
gr = GradientBoostingRegressor()
gr.fit(X,y)

# Save Model Using Joblib

In [None]:
from joblib import load, dump

In [None]:
dump(gr,'model_joblib_gr')

In [None]:
model = load('model_joblib_gr')

In [None]:
model.predict(df3)

# Create GUI of charges for newly arrived customer

In [1]:
from tkinter import *

In [2]:
import joblib

In [6]:
import joblib
import tkinter as tk

def show_entry():
    """Predict the insurance cost based on the user's input."""

    try:
        # Get the user's input.
        p1 = float(e1.get())
        p2 = float(e2.get())
        p3 = float(e3.get())
        p4 = float(e4.get())
        p5 = float(e5.get())
        p6 = float(e6.get())

        # Load the machine learning model.
        model = joblib.load('model_joblib_gr')

        # Make a prediction.
        result = model.predict([[p1, p2, p3, p4, p5, p6]])

        # Display the result.
        Label(master, text="Insurance Cost").grid(row=7)
        Label(master, text=result[0], fg="green").grid(row=8)
    except ValueError:
        # Display an error message if the user's input is invalid.
        Label(master, text="Invalid input.", fg="red").grid(row=8)

master = tk.Tk()
master.title("Insurance Cost Prediction")

# Create a label for the title.
label = tk.Label(master, text="Insurance Cost Prediction", bg="black", fg="white")
label.grid(row=0, columnspan=2)

# Create labels for the input fields.
Label(master, text="Enter Your Age").grid(row=1)
Label(master, text="Male Or Female [1/0]").grid(row=2)
Label(master, text="Enter your BMI Value").grid(row=3)
Label(master, text="Enter Number of Children").grid(row=4)
Label(master, text="Smoker Yes/No[1/0]").grid(row=5)
Label(master, text="Region [1-4]").grid(row=6)

# Create entry fields for the user's input.
e1 = tk.Entry(master)
e2 = tk.Entry(master)
e3 = tk.Entry(master)
e4 = tk.Entry(master)
e5 = tk.Entry(master)
e6 = tk.Entry(master)

# Place the entry fields on the grid.
e1.grid(row=1, column=1)
e2.grid(row=2, column=1)
e3.grid(row=3, column=1)
e4.grid(row=4, column=1)
e5.grid(row=5, column=1)
e6.grid(row=6, column=1)

# Create a button to submit the form.
button = tk.Button(master, text="Predict", command=show_entry)
button.grid()

# Start the mainloop.
master.mainloop()

