In [1]:
from google.colab import files
uploaded = files.upload()

Saving insurance.csv to insurance (1).csv


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [4]:
df = pd.read_csv("insurance (1).csv")
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [5]:
le = LabelEncoder()
df['sex'] = le.fit_transform(df['sex'])
df['smoker'] = le.fit_transform(df['smoker'])
df['region'] = le.fit_transform(df['region'])

df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,1,3,16884.924
1,18,1,33.77,1,0,2,1725.5523
2,28,1,33.0,3,0,2,4449.462
3,33,1,22.705,0,0,1,21984.47061
4,32,1,28.88,0,0,1,3866.8552


In [6]:
X = df.drop('charges', axis=1)
y = df['charges']

In [7]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [10]:
svr_rbf = SVR(kernel='rbf')
svr_rbf.fit(X_train, y_train)

y_pred_rbf = svr_rbf.predict(X_test)

mae_rbf = mean_absolute_error(y_test, y_pred_rbf)
rmse_rbf = np.sqrt(mean_squared_error(y_test, y_pred_rbf))
r2_rbf = r2_score(y_test, y_pred_rbf)

print("RBF Kernel Results")
print("MAE:", mae_rbf)
print("RMSE:", rmse_rbf)
print("R2 Score:", r2_rbf)

RBF Kernel Results
MAE: 8599.3274179344
RMSE: 12877.846306805892
R2 Score: -0.06821436741015718


In [11]:
svr_poly = SVR(kernel='poly')
svr_poly.fit(X_train, y_train)

y_pred_poly = svr_poly.predict(X_test)

mae_poly = mean_absolute_error(y_test, y_pred_poly)
rmse_poly = np.sqrt(mean_squared_error(y_test, y_pred_poly))
r2_poly = r2_score(y_test, y_pred_poly)

print("Polynomial Kernel Results")
print("MAE:", mae_poly)
print("RMSE:", rmse_poly)
print("R2 Score:", r2_poly)

Polynomial Kernel Results
MAE: 8589.739773096262
RMSE: 12848.191019195776
R2 Score: -0.06330023362857706


In [12]:
results = pd.DataFrame({
    "Kernel": ["RBF", "Polynomial"],
    "MAE": [mae_rbf, mae_poly],
    "RMSE": [rmse_rbf, rmse_poly],
    "R2 Score": [r2_rbf, r2_poly]
})

results

Unnamed: 0,Kernel,MAE,RMSE,R2 Score
0,RBF,8599.327418,12877.846307,-0.068214
1,Polynomial,8589.739773,12848.191019,-0.0633


Conclusion

* In this experiment, Support Vector Regression (SVR) was applied using two different kernels: RBF and Polynomial to predict insurance charges.

* The models were evaluated using the following metrics:

- Mean Absolute Error (MAE)

- Root Mean Squared Error (RMSE)

- R² Score

* From the experimental results:

- The Polynomial kernel produced slightly lower MAE compared to RBF.

- The Polynomial kernel also gave a slightly lower RMSE, indicating better prediction accuracy.

- The R² score of the Polynomial kernel was marginally higher (less negative) than that of the RBF kernel.

Since the best model should have:

✔ Lower MAE

✔ Lower RMSE

✔ Higher R² score

It can be concluded that the Polynomial kernel performs slightly better than the RBF kernel for predicting insurance costs in this dataset.

However, both models produced low R² scores, indicating that further hyperparameter tuning or feature engineering could improve performance.