# Fine Tuning

In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [5]:
df = pd.read_csv("insurance.csv")
df

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


In [7]:
df  = pd.get_dummies(df,columns=['sex','smoker','region'])
df = df.astype(int)
df.head(3)

Unnamed: 0,age,bmi,children,charges,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
0,19,27,0,16884,1,0,0,1,0,0,0,1
1,18,33,1,1725,0,1,1,0,0,0,1,0
2,28,33,3,4449,0,1,1,0,0,0,1,0


In [15]:
x = df.drop(columns=['charges'])
y = df['charges']
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2, random_state = 42)

In [35]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
model = LinearRegression()
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
accuracy = r2_score(y_test,y_pred)
print(f"Accuracy: {accuracy*100:.2f}%")

Accuracy: 78.38%


In [37]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.linear_model import Lasso

In [39]:
param_dict ={'alpha':[0.1,1.0,10.0,100.0]}
lasso = Lasso()
random_search = RandomizedSearchCV(lasso,param_dict,n_iter=3,cv=3)
random_search.fit(x_train,y_train)
y_pred_new = random_search.predict(x_test)
accuracy = r2_score(y_test,y_pred_new)
print(f"Accuracy(Random_Search): {accuracy*100:.2f}%")

Accuracy(Random_Search): 77.97%


In [41]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
param_dict ={'alpha':[0.1,1.0,10.0,100.0]}
ridge = Ridge()
grid_search = GridSearchCV(ridge,param_dict,cv=3)
grid_search.fit(x_train,y_train)
y_pred_new = random_search.predict(x_test)
accuracy = r2_score(y_test,y_pred_new)
print(f"Accuracy(Grid_Search): {accuracy*100:.2f}%")

Accuracy(Grid_Search): 77.97%


# Tips Dataset

In [44]:
df = pd.read_csv("tips.csv")
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [46]:
df = pd.get_dummies(df,columns=['sex','smoker','day','time'])
df = df.astype(int)
df.head(3)

Unnamed: 0,total_bill,tip,size,sex_Female,sex_Male,smoker_No,smoker_Yes,day_Fri,day_Sat,day_Sun,day_Thur,time_Dinner,time_Lunch
0,16,1,2,1,0,1,0,0,0,1,0,1,0
1,10,1,3,0,1,1,0,0,0,1,0,1,0
2,21,3,3,0,1,1,0,0,0,1,0,1,0


In [50]:
x = df.drop(columns=['total_bill'])
y = df['total_bill']

In [60]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2, random_state = 42)

In [62]:
model = LinearRegression()
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
accuracy = r2_score(y_test,y_pred)
print(f"Accuracy: {accuracy*100:.2f}%")

Accuracy: 61.64%


# RandomizedSearchCV

In [119]:
param_dict ={'alpha':[0.1,1.0,10.0,100.0]}
lasso = Lasso()
random_search = RandomizedSearchCV(lasso,param_dict,n_iter=4,cv=2)
random_search.fit(x_train,y_train)
y_pred_new = random_search.predict(x_test)
accuracy = r2_score(y_test,y_pred_new)
print(f"Accuracy(Random_Search): {accuracy*100:.2f}%")

Accuracy(Random_Search): 62.61%


# GridSearchCV

In [121]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
param_dict ={'alpha':[0.1,1.0,10.0,100.0]}
ridge = Ridge()
grid_search = GridSearchCV(ridge,param_dict,cv=30)
grid_search.fit(x_train,y_train)
y_pred_new = random_search.predict(x_test)
accuracy = r2_score(y_test,y_pred_new)
print(f"Accuracy(Grid_Search): {accuracy*100:.2f}%")

Accuracy(Grid_Search): 62.61%
