In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
dataset = pd.read_csv('/content/insurance_pre.csv')

In [5]:
dataset.head()

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.9,0,yes,16884.924
1,18,male,33.77,1,no,1725.5523
2,28,male,33.0,3,no,4449.462
3,33,male,22.705,0,no,21984.47061
4,32,male,28.88,0,no,3866.8552


In [6]:
dataset=pd.get_dummies(dataset,dtype=int,drop_first=True)

In [7]:
dataset.head()

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.9,0,16884.924,0,1
1,18,33.77,1,1725.5523,1,0
2,28,33.0,3,4449.462,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.88,0,3866.8552,1,0


In [8]:
indep=dataset[['age', 'bmi', 'children','sex_male', 'smoker_yes']]
dep=dataset['charges']

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(indep, dep, test_size = 0.20, random_state = 0)

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
param_grid = {
    'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 5, 10, 15, 20, 30, 50],
    'min_samples_split': [2, 5, 10, 20, 50],
    'min_samples_leaf': [1, 2, 4, 10, 20],
    'max_features': [None, 'auto', 'sqrt', 'log2'],
    'max_leaf_nodes': [None, 10, 20, 50, 100],
    'min_impurity_decrease': [0.0, 0.01, 0.1],
    'ccp_alpha': [0.0, 0.01, 0.1]
}
grid = GridSearchCV(DecisionTreeRegressor(), param_grid, refit = True, verbose = 3 ,n_jobs=-1, cv=2)
# fitting the model for grid search
grid.fit(X_train, y_train)

Fitting 2 folds for each of 432 candidates, totalling 864 fits


In [15]:
best_model = grid.best_estimator_

In [16]:
y_pred = best_model.predict(X_test)

In [17]:
from sklearn.model_selection import GridSearchCV

# Assuming 'grid' is your GridSearchCV object and has been fitted to the data
print("Best parameters:", grid.best_params_)

# Access the mean test score for the best parameters
best_index = grid.best_index_
best_r2_score = grid.cv_results_['mean_test_score'][best_index]
# Printing the best R² score
print("r2:",best_r2_score)

Best parameters: {'criterion': 'squared_error', 'max_depth': 10, 'max_features': None, 'min_samples_leaf': 20, 'min_samples_split': 10, 'splitter': 'best'}
r2: 0.828996023563211


In [18]:
table=pd.DataFrame.from_dict(grid.cv_results_)

In [19]:
table.sort_values('rank_test_score')

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_max_features,param_min_samples_leaf,param_min_samples_split,param_splitter,params,split0_test_score,split1_test_score,mean_test_score,std_test_score,rank_test_score
68,0.006144,0.003596,0.001659,0.000103,squared_error,10,,20,10,best,"{'criterion': 'squared_error', 'max_depth': 10...",0.834500,0.823492,0.828996,0.005504,1
390,0.002739,0.000138,0.001515,0.000040,friedman_mse,30,,20,2,best,"{'criterion': 'friedman_mse', 'max_depth': 30,...",0.834500,0.823492,0.828996,0.005504,1
336,0.002588,0.000114,0.001616,0.000241,friedman_mse,20,,20,2,best,"{'criterion': 'friedman_mse', 'max_depth': 20,...",0.834500,0.823492,0.828996,0.005504,1
338,0.002546,0.000018,0.001486,0.000018,friedman_mse,20,,20,10,best,"{'criterion': 'friedman_mse', 'max_depth': 20,...",0.834500,0.823492,0.828996,0.005504,1
120,0.002628,0.000035,0.001465,0.000079,squared_error,20,,20,2,best,"{'criterion': 'squared_error', 'max_depth': 20...",0.834500,0.823492,0.828996,0.005504,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
357,0.003336,0.001112,0.005707,0.002028,friedman_mse,20,sqrt,20,10,random,"{'criterion': 'friedman_mse', 'max_depth': 20,...",0.001570,0.648819,0.325194,0.323624,428
301,0.005775,0.000644,0.003525,0.001943,friedman_mse,10,sqrt,20,2,random,"{'criterion': 'friedman_mse', 'max_depth': 10,...",0.574746,0.060271,0.317508,0.257238,429
315,0.004072,0.000111,0.002375,0.000124,friedman_mse,10,log2,10,10,random,"{'criterion': 'friedman_mse', 'max_depth': 10,...",0.447536,0.172754,0.310145,0.137391,430
103,0.005106,0.001848,0.002887,0.000886,squared_error,10,log2,20,2,random,"{'criterion': 'squared_error', 'max_depth': 10...",0.362989,0.224245,0.293617,0.069372,431


In [20]:
best_model = grid.best_estimator_

In [21]:
import pickle
filename="RF_G.sav"
pickle.dump(best_model,open(filename,'wb'))