In [None]:
# Install and Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

# Load the Dataset
df = pd.read_csv('/content/insurance.csv')

# Split into Features and Target
x = df.drop("charges", axis=1)
y = df["charges"]

# Preprocessing Pipeline
categorical = ['sex', 'smoker', 'region']
numerical = ['age', 'bmi', 'children']

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical),
    ('cat', OneHotEncoder(drop='first'), categorical)
])

# Create the Full Pipeline with Linear Regression
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Train/Test Split and Model Training
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
model.fit(x_train, y_train)

# Save the Model (optional)
joblib.dump(model, 'insurance_model.pkl')

['insurance_model.pkl']

In [None]:
# Load the trained model
model = joblib.load('insurance_model.pkl')

from google.colab import files
files.download('insurance_model.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>