In [1]:
import numpy as np
import pandas as pd
from numpy import math

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

import matplotlib.pyplot as plt

#importing the dataset
dataset = pd.read_csv('./dataset/insurance.csv', skipinitialspace=True)

# Create Dummy variables for the categorical variables region
dataset = pd.get_dummies(dataset, columns=['region'])

# Create Dummy variables for the categorical variables smoker
dataset = pd.get_dummies(dataset, columns=['smoker'])

# Create Dummy variables for the categorical variables sex
dataset = pd.get_dummies(dataset, columns=['sex'])

dependent_variable = 'charges'

#create a list of independent variables
independent_variables = dataset.columns.tolist()

independent_variables.remove(dependent_variable)

#create the data of independent variables
X = dataset[independent_variables].values

#create the data of dependent variable
y = dataset[dependent_variable].values

#splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

#Transforming the data
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Fitting Ridge Regression to the Training set
regressor = Ridge()
regressor.fit(X_train, y_train)

# Predicting the Test set results
y_pred = regressor.predict(X_test)

# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean squared error: {mse:.2f}")
print(f"R^2 score: {r2:.2f}")


Mean squared error: 31894483.35
R^2 score: 0.80
