<a href="https://colab.research.google.com/github/AmalAbidS/streamlit-for-insurance/blob/main/Insurance_Gradio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gradio



Collecting gradio
  Downloading gradio-4.37.2-py3-none-any.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m55.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==1.0.2 (from gradio)
  Downloading gradio_client-1.0.2-py3-none-any.whl (318 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.2/318.2 kB[0m [31m23.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━

In [2]:
import gradio as gr


In [3]:
import pandas as pd
import numpy as np
import math
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split



###Classification


In [4]:
!unzip /content/Regression.zip

Archive:  /content/Regression.zip
  inflating: insurance.csv           


In [9]:

# Load the dataset
csv_file_path = "/content/insurance.csv"
df = pd.read_csv(csv_file_path)

# Print the DataFrame
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [10]:
# One-hot encode categorical variables
df = pd.get_dummies(df)

# Print the DataFrame
df.head()

Unnamed: 0,age,bmi,children,charges,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
0,19,27.9,0,16884.924,True,False,False,True,False,False,False,True
1,18,33.77,1,1725.5523,False,True,True,False,False,False,True,False
2,28,33.0,3,4449.462,False,True,True,False,False,False,True,False
3,33,22.705,0,21984.47061,False,True,True,False,False,True,False,False
4,32,28.88,0,3866.8552,False,True,True,False,False,True,False,False


In [11]:
# Assuming 'charges' is the target column
X = df.drop('charges', axis=1)
y = df['charges']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [19]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [20]:
# Define the pipeline for Linear Regression
pipeline_lin_reg = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LinearRegression())
])

# Train and evaluate the Linear Regression model
pipeline_lin_reg.fit(X_train, y_train)
y_pred_lin_reg = pipeline_lin_reg.predict(X_test)
print("Linear Regression RMSE:", math.sqrt(mean_squared_error(y_test, y_pred_lin_reg)))
print("Linear Regression R^2:", r2_score(y_test, y_pred_lin_reg))

Linear Regression RMSE: 5788.86615537482
Linear Regression R^2: 0.7841465689101635


In [21]:
# Define the pipeline for Random Forest Regressor
pipeline_rf_reg = Pipeline([
    ('scaler', StandardScaler()),
    ('model', RandomForestRegressor())
])

# Train and evaluate the Random Forest Regressor model
pipeline_rf_reg.fit(X_train, y_train)
y_pred_rf = pipeline_rf_reg.predict(X_test)
print("Random Forest RMSE:", math.sqrt(mean_squared_error(y_test, y_pred_rf)))
print("Random Forest R^2:", r2_score(y_test, y_pred_rf))

Random Forest RMSE: 4601.834674161459
Random Forest R^2: 0.8635938460506078


In [25]:
# Define a function for making predictions using the trained models
def predict_model(age, children, sex_female, sex_male, smoker_no, smoker_yes, region_northeast,region_southwest):
    input_data = np.array([[age,  children, sex_female, sex_male, smoker_no, smoker_yes, region_northeast, region_southwest]])
    input_data_scaled = scaler.transform(input_data)

    prediction_lin_reg = pipeline_lin_reg.predict(input_data_scaled)[0]
    prediction_rf_reg = pipeline_rf_reg.predict(input_data_scaled)[0]

    return prediction_lin_reg, prediction_rf_reg

In [26]:
# Create the Gradio interface
inputs = [
    gr.Slider(minimum=18, maximum=100, value=30, label="Age"),
    gr.Slider(minimum=0, maximum=10, value=1, label="Children"),
    gr.Radio(choices=["Female", "Male"], label="Sex"),
    gr.Radio(choices=["No", "Yes"], label="Smoker"),
    gr.Radio(choices=["Northeast", "Southwest"], label="Region")
]

outputs = [
    gr.Textbox(label="Linear Regression Prediction"),
    gr.Textbox(label="Random Forest Prediction")
]

In [29]:
def transform_inputs(age,children, sex, smoker, region):
    sex_female, sex_male = (1, 0) if sex == "Female" else (0, 1)
    smoker_no, smoker_yes = (1, 0) if smoker == "No" else (0, 1)
    region_northeast, region_northwest, region_southeast, region_southwest = (1, 0, 0, 0) if region == "Northeast" else (0, 1, 0, 0) if region == "Northwest" else (0, 0, 1, 0) if region == "Southeast" else (0, 0, 0, 1)

    return age, children, sex_female, sex_male, smoker_no, smoker_yes, region_northeast, region_southwest

gr.Interface(fn=lambda age, children, sex, smoker, region: predict_model(*transform_inputs(age, children, sex, smoker, region)),
             inputs=inputs,
             outputs=outputs,
             title="Insurance Charges Prediction",
             description="Predict insurance charges using Linear Regression and Random Forest models.").launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://c7f0d857298c221b5d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


