In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [None]:
#Read the data
data=pd.read_csv("https://raw.githubusercontent.com/dsrscientist/dataset4/main/medical_cost_insurance.csv")

In [None]:
#see the data
data.head()

In [None]:
data.info()

In [None]:
data['sex'].unique()

In [None]:
#converting catagorial data into numerical data-

In [None]:
data['sex']=data['sex'].map({'female':0,'male':1})

In [None]:
data.head()

In [None]:
data['smoker']=data['smoker'].map({'yes':1,'no':0})

In [None]:
data.head()

In [None]:
data['region'].unique()

In [None]:
data['region']=data['region'].map({'southwest':1,'southeast':2,'northwest':3,'northeast':4})

In [None]:
data.head()

In [None]:
data.columns

In [None]:
#storing dependent and independent data-

In [None]:
X=data.drop(['charges'],axis=1)

In [None]:
y=data['charges']

In [None]:
#train/test split-

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=40)

In [None]:
y_train

In [None]:
#import the models-

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor

In [None]:
#Model training-

In [None]:
lr=LinearRegression()
lr.fit(X_train,y_train)
svm=SVR()
svm.fit(X_train,y_train)
rfr=RandomForestRegressor()
rfr.fit(X_train,y_train)
gbr=GradientBoostingRegressor()
gbr.fit(X_train,y_train)

In [None]:
#prediction on test data-

In [None]:
y_pred1=lr.predict(X_test)
y_pred2=svm.predict(X_test)
y_pred3=rfr.predict(X_test)
y_pred4=gbr.predict(X_test)

df1=pd.DataFrame({'Actual':y_test,'lr':y_pred1,'svm':y_pred2,'rfr':y_pred3,'gbr':y_pred4})

In [None]:
df1

In [None]:
#visualization-

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.subplot(221)
plt.plot(df1['Actual'].iloc[0:11],label='Actual')
plt.plot(df1['lr'].iloc[0:11],label="lr")
plt.legend()

In [None]:
#compare performance visualization-

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.subplot(221)
plt.plot(df1['Actual'].iloc[0:11],label='Actual')
plt.plot(df1['lr'].iloc[0:11],label="lr")
plt.legend()

plt.subplot(222)
plt.plot(df1['Actual'].iloc[0:11],label='Actual')
plt.plot(df1['svm'].iloc[0:11],label="svm")
plt.legend()

plt.subplot(223)
plt.plot(df1['Actual'].iloc[0:11],label='Actual')
plt.plot(df1['rfr'].iloc[0:11],label="rfr")
plt.legend()

plt.subplot(224)
plt.plot(df1['Actual'].iloc[0:11],label='Actual')
plt.plot(df1['gbr'].iloc[0:11],label="gbr")

plt.tight_layout()

plt.legend()

In [None]:
#evaluating the algorithm-

In [None]:
from sklearn import metrics

In [None]:
score1=metrics.r2_score(y_test,y_pred1)
score2=metrics.r2_score(y_test,y_pred2)
score3=metrics.r2_score(y_test,y_pred3)
score4=metrics.r2_score(y_test,y_pred4)

In [None]:
print(score1,score2,score3,score4)

In [None]:
s1=metrics.mean_absolute_error(y_test,y_pred1)
s2=metrics.mean_absolute_error(y_test,y_pred2)
s3=metrics.mean_absolute_error(y_test,y_pred3)
s4=metrics.mean_absolute_error(y_test,y_pred4)
print(s1,s2,s3,s4)

In [None]:
#getting data from new customers-

In [None]:
data={'age':40,
     'sex':0,
     'bmi':30.45,
     'children':4,
     'smoker':0,
     'region':3}
     
df = pd.DataFrame(data,index=[0])
df

In [None]:
new_pred=gbr.predict(df)
print(new_pred)

In [None]:
#saving the model-

In [None]:
gbr=GradientBoostingRegressor()
gbr.fit(X,y)

In [None]:
import joblib

In [None]:
joblib.dump(gbr,'model_joblib_gbr')

In [None]:
model=joblib.load('model_joblib_gbr')

In [None]:
model.predict(df)

In [None]:
#GUI

In [None]:
from tkinter import*

In [None]:
import joblib

In [None]:
def show_entry():
    
    p1=float(e1.get())
    p2=float(e2.get())
    p3=float(e3.get())
    p4=float(e4.get())
    p5=float(e5.get())
    p6=float(e6.get())
    
    model = joblib.load(model_joblib_gbr)
    result = model.predict([[p1,p2,p3,p4,p5,p6]])
    
    label(master, text="Inscurance cost").grid(row=7)
    label(master, text=result).grid(row=8)
    

master=Tk()
master.title("Insurance cost Prediction")
label = Label(master,text="Insurance Cost Prediction",bg = "black",
           fg="white").grid(row=0,columnspan=2)

Label(master,text = "Enter your age").grid(row=1)
Label(master,text = "Male or Female").grid(row=2)
Label(master,text = "Enter your BMI value").grid(row=3)
Label(master,text = "Enter number of childern").grid(row=4)
Label(master,text = "Smoker yes/no [1/0]").grid(row=5)
Label(master,text = "Region [1-4]").grid(row=6)

e1=Entry(master)
e2=Entry(master)
e3=Entry(master)
e4=Entry(master)
e5=Entry(master)
e6=Entry(master)

e1.grid(row=1,column=1)
e2.grid(row=2,column=1)
e3.grid(row=3,column=1)
e4.grid(row=4,column=1)
e5.grid(row=5,column=1)
e6.grid(row=6,column=1)

Button(master,text="Predict",command=show_entry).grid()

mainloop()