In [2]:
import pandas as pd

df = pd.read_csv("/content/insurance.csv")
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [3]:
df_encoded = pd.get_dummies(df, columns=["sex", "smoker", "region"], drop_first=True)
df_encoded.head()

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes,region_northwest,region_southeast,region_southwest
0,19,27.9,0,16884.924,False,True,False,False,True
1,18,33.77,1,1725.5523,True,False,False,True,False
2,28,33.0,3,4449.462,True,False,False,True,False
3,33,22.705,0,21984.47061,True,False,True,False,False
4,32,28.88,0,3866.8552,True,False,True,False,False


In [4]:
X = df_encoded.drop("charges", axis=1)
y = df_encoded["charges"]

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
from sklearn.svm import SVR

svr_rbf = SVR(kernel="rbf")
svr_rbf.fit(X_train_scaled, y_train)

y_pred_rbf = svr_rbf.predict(X_test_scaled)

In [8]:
svr_poly = SVR(kernel="poly", degree=3)
svr_poly.fit(X_train_scaled, y_train)

y_pred_poly = svr_poly.predict(X_test_scaled)

In [9]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

def evaluate_model(name, y_test, y_pred):
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    print(f"----- {name} -----")
    print("MAE :", mae)
    print("RMSE:", rmse)
    print("R2  :", r2)
    print()

    return mae, rmse, r2

In [10]:
results = {}

results["SVR RBF"] = evaluate_model("SVR RBF", y_test, y_pred_rbf)
results["SVR Polynomial"] = evaluate_model("SVR Polynomial", y_test, y_pred_poly)

----- SVR RBF -----
MAE : 8612.408423351833
RMSE: 12889.096314656128
R2  : -0.07008155372454805

----- SVR Polynomial -----
MAE : 8607.801381076031
RMSE: 12872.961371328372
R2  : -0.0674041125836411



In [11]:
results_df = pd.DataFrame(results, index=["MAE", "RMSE", "R2 Score"]).T
results_df

Unnamed: 0,MAE,RMSE,R2 Score
SVR RBF,8612.408423,12889.096315,-0.070082
SVR Polynomial,8607.801381,12872.961371,-0.067404


In [12]:
best_kernel = results_df["RMSE"].idxmin()
print("✅ Best Kernel based on Lowest RMSE:", best_kernel)

✅ Best Kernel based on Lowest RMSE: SVR Polynomial


SVR model was trained using RBF and Polynomial kernels.
The models were evaluated using MAE, RMSE, and R² score.
The best kernel is chosen based on lowest RMSE and highest R².
From results, the best kernel is: (write the printed output).