In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score
import joblib  # To save the model

# Step 1: Load the dataset
dataset = pd.read_csv("insurance_pre.csv")
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [2]:
# Step 2: One-hot encode categorical variables
dataset = pd.get_dummies(dataset, dtype=int, drop_first=True)
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [3]:
# Step 3: Define independent and dependent variables
independent = dataset[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
dependent = dataset[['charges']]


In [4]:
# Step 4: Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(independent, dependent, test_size=0.30, random_state=0)

In [5]:
# Step 5: Apply StandardScaler to the input features (independent variables)
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [6]:
# Step 6: Create and train the SVR model
regressor = SVR(kernel='rbf', C=3000)
regressor.fit(x_train, y_train.values.ravel())


In [7]:
# Optionally: Check the model's performance using R^2 score
y_pred = regressor.predict(x_test)
r_score = r2_score(y_test, y_pred)
print(f'R-squared Score: {r_score}')

R-squared Score: 0.8663393953081687


In [8]:
# Step 7: Save the trained model and the scaler
joblib.dump(regressor, 'svr_model.pkl')  # Save the SVR model
joblib.dump(sc, 'scaler.pkl')  # Save the StandardScaler for deployment


['scaler.pkl']

Deployment Phace

In [9]:
# Import necessary libraries
import pandas as pd
import joblib  # To load the model and scaler


In [10]:
# Step 1: Load the saved model and scaler
regressor = joblib.load('svr_model.pkl')  # Load the SVR model
sc = joblib.load('scaler.pkl')  # Load the StandardScaler

In [11]:
# Example: A new dataset for prediction (must contain the same features used during training)
new_data = pd.DataFrame({
    'age': [45],
    'bmi': [30.5],
    'children': [2],
    'sex_male': [1],  # 1 for male, 0 for female (based on one-hot encoding)
    'smoker_yes': [0]  # 1 for smoker, 0 for non-smoker
})

In [12]:
new_data

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,45,30.5,2,1,0


In [13]:
# Step 3: Preprocess the new input data
new_data_scaled = sc.transform(new_data)  # Apply the same scaling as during training

In [14]:
new_data_scaled

array([[ 0.39477427, -0.0404936 ,  0.76341038,  1.02378711, -0.50466988]])

In [15]:
# Step 4: Make predictions using the saved model
predicted_charge = regressor.predict(new_data_scaled)

In [16]:
predicted_charge

array([8418.49688878])

In [17]:
# Step 5: Output the prediction
print(f'Predicted Insurance Charge: {predicted_charge[0]:.2f}')

Predicted Insurance Charge: 8418.50
