In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score

##These 3 lines are needed to make utils module accessible to this module
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath("non-existent.txt"))))

from utils.utils import find_best_hyperparameters

In [2]:
import warnings
#suppress warnings
warnings.filterwarnings('ignore')

In [3]:
dataset = pd.read_csv("insurance_pre.csv")

In [4]:
dataset.head()

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.9,0,yes,16884.924
1,18,male,33.77,1,no,1725.5523
2,28,male,33.0,3,no,4449.462
3,33,male,22.705,0,no,21984.47061
4,32,male,28.88,0,no,3866.8552


In [5]:
dataset = pd.get_dummies(dataset,drop_first=True)

In [6]:
dataset.head()

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.9,0,16884.924,False,True
1,18,33.77,1,1725.5523,True,False
2,28,33.0,3,4449.462,True,False
3,33,22.705,0,21984.47061,True,False
4,32,28.88,0,3866.8552,True,False


In [7]:
independent_vars = ["age","bmi","children","sex_male","smoker_yes"]

In [8]:
x = dataset[independent_vars]

In [9]:
x.head()

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.9,0,False,True
1,18,33.77,1,True,False
2,28,33.0,3,True,False
3,33,22.705,0,True,False
4,32,28.88,0,True,False


In [10]:
x.shape

(1338, 5)

In [11]:
dependent_vars = ["charges"]

In [12]:
y=dataset[dependent_vars]

In [13]:
y.head()

Unnamed: 0,charges
0,16884.924
1,1725.5523
2,4449.462
3,21984.47061
4,3866.8552


In [14]:
y.shape

(1338, 1)

In [15]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.30,random_state=0)

In [16]:
param_dict = {
    "C": [10,100,500,1000,2000,3000],
    "kernel": ["linear", "poly", "rbf", "sigmoid"]
}

In [17]:
#print csv header
print("C,kernel,r_score")

best_combo = find_best_hyperparameters(
    param_dict,
    x_train, y_train, x_test, y_test,
    create_regressor_callback=lambda combo: make_pipeline(StandardScaler(), SVR(**combo)),
    print_combo_callback=lambda combo: print(f'{combo["C"]},{combo["kernel"]},{combo["r_score"]}')
)

print("\nBest combination:")
print(f'C={best_combo["C"]}, kernel={best_combo["kernel"]}, r_score={best_combo["r_score"]}')
    

C,kernel,r_score
10,linear,0.462468414233968
10,poly,0.038716222760231456
10,rbf,-0.0322732939067103
10,sigmoid,0.03930714378274347
100,linear,0.6288792857320367
100,poly,0.6179569624059797
100,rbf,0.3200317832050832
100,sigmoid,0.5276103546510407
500,linear,0.763105797597539
500,poly,0.8263683541268981
500,rbf,0.6642984611986598
500,sigmoid,0.4446061033869473
1000,linear,0.7649311738597033
1000,poly,0.856648767594656
1000,rbf,0.8102064874808204
1000,sigmoid,0.2874706948697654
2000,linear,0.7440418308108018
2000,poly,0.8605579258597715
2000,rbf,0.8547766422240716
2000,sigmoid,-0.5939509731283503
3000,linear,0.7414236599249162
3000,poly,0.8598930084494385
3000,rbf,0.8663393963090398
3000,sigmoid,-2.1244194786689863

Best combination:
C=3000, kernel=rbf, r_score=0.8663393963090398
