In [None]:
import pandas as pd
from pathlib import Path
import warnings
#suppress warnings
warnings.filterwarnings('ignore')
print("Imports successful")

In [None]:
dataset = pd.read_csv(Path("../regressionAssignment/insurance_pre.csv"))
dataset = pd.get_dummies(dataset,drop_first=True)

independent_vars = ["age","bmi","children","sex_male","smoker_yes"]
dependent_var = ["charges"]

x = dataset[independent_vars]
y = dataset[dependent_var]

x.info()
print(f"x.shape={x.shape} y.shape={y.shape}")



In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor

param_grid = {
    'criterion':['mse','mae','friedman_mse'],
    'max_features': ['auto','sqrt','log2'],
    'splitter': ['best', 'random']
}

#We can try scaling for both input and output
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_scaled = sc.fit_transform(x)
sc_y = StandardScaler()
y_scaled = sc_y.fit_transform(y)


#refit if true will fit the model for best hyperparameter combination
grid = GridSearchCV(DecisionTreeRegressor(), param_grid, refit=True, verbose=3, n_jobs=-1)

# fitting the model for grid search
grid.fit(x_scaled, y_scaled)


In [None]:
#print best parameter
print(f"Best parameter={grid.best_params_}")

In [None]:
#Let us see the results in tabular format
res_dict = grid.cv_results_
res_table = pd.DataFrame.from_dict(res_dict)

# Display only first 5 rows
print("First 5 rows:")
first_five_rows=res_table.head(5)
first_five_rows



In [None]:
# Display the row that has rank_test_score = 1
# Note that hyper parameter combination 3000,scale,rbf is ranked no 1

print("\nRow with rank_test_score = 1:")
res_table[res_table['rank_test_score'] == 1]


In [None]:
#Set this to True for Vscode and False for Jupyter notebook
use_widgets = True

if use_widgets:
  from ipywidgets import widgets
  from IPython.display import display

  age_widget = widgets.IntText(description='Age:', value=0)
  bmi_widget = widgets.FloatText(description='BMI:', value=0.0)
  children_widget = widgets.IntText(description='Children:', value=0)
  sex_widget = widgets.Checkbox(description='Male?', value=False)
  smoker_widget = widgets.Checkbox(description='Smoker?', value=False)

  display(age_widget, bmi_widget, children_widget, sex_widget, smoker_widget)

else:
  print("Widgets not available, using input() instead")
  age = int(input("Enter Age: "))
  bmi = float(input("Enter BMI: "))
  children = int(input("No. of children: "))
  sex_male = input("Male? (yes/no): ").lower() in ['yes', 'y', '1']
  smoker = input("Smoker? (yes/no): ").lower() in ['yes', 'y', '1']  




In [10]:
if use_widgets:
  age = age_widget.value
  bmi = bmi_widget.value
  children = children_widget.value
  sex_male = sex_widget.value
  smoker = smoker_widget.value
  
print(f"age={age} bmi={bmi} children={children} male={sex_male} smoker={smoker}")

age=25 bmi=22.5 children=2 male=True smoker=False


In [12]:
#Let us do prediction
transformed_input = sc.transform([[age,bmi,children,sex_male,smoker]])
prediction = grid.predict(transformed_input)

print(f"prediction Before applying inverse transorm = {prediction}")
#prediction should be reverse preprocessed
prediction = sc_y.inverse_transform([prediction])

print(f"prediction={prediction}")

prediction Before applying inverse transorm = [0.82368053]
prediction=[[23241.47453]]
