In [13]:
%%writefile Insurance_Prediction_app.py

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

# Title of the app
st.title("Insurance Prediction App Using Linear Regression")

# Uploading file option
uploaded_file = st.file_uploader("Please, upload your CSV file", type='csv')

# Load the dataset
if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    st.title("Data Preview")
    st.dataframe(df.head())

    st.title("Data Info")
    buffer = []
    df.info(buf=buffer)
    info_str = '\n'.join(buffer)
    st.text(info_str)

    st.title('Missing Values')
    st.write(df.isnull().sum())

    # Handling missing values for numerical & categorical columns
    num_cols = df.select_dtypes(include=['float64', 'int64']).columns
    cat_cols = df.select_dtypes(include=['object']).columns

    # Imputing the numerical values
    num_imputer = SimpleImputer(strategy="mean")
    df[num_cols] = num_imputer.fit_transform(df[num_cols])

    # Imputing the categorical values
    col_imputer = SimpleImputer(strategy="most_frequent")
    df[cat_cols] = col_imputer.fit_transform(df[cat_cols])

    # Label encoding categorical values
    le_smoker = LabelEncoder()
    df['smoker'] = le_smoker.fit_transform(df['smoker'])
    le_region = LabelEncoder()
    df['region'] = le_region.fit_transform(df['region'])

    # Selecting the relevant features and target variable
    x = df[['age', 'sex', 'bmi', 'children', 'smoker', 'region']]
    y = df['charges']

    # Splitting the dataset into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

    # Creating the model
    model = LinearRegression()
    model.fit(x_train, y_train)

    # Prediction on the test set
    y_pred = model.predict(x_test)
    mse = mean_squared_error(y_test, y_pred)
    st.write(f"Mean Squared Error: {mse:.2f}")

    # User input for insurance charges
    st.header("Predict Insurance Charges")

    # User inputs
    age = st.number_input('Age', min_value=2, max_value=100, value=25)
    bmi = st.number_input('BMI', min_value=10, max_value=50, value=25)
    children = st.number_input('Children', min_value=0, max_value=5, value=0)
    smoker = st.selectbox('Smoker', ('Yes', 'No'))
    region = st.selectbox('Region', ('Southwest', 'Southeast', 'Northwest', 'Northeast'))

    # Encode user inputs
    smoker_encoded = le_smoker.transform([smoker])[0]
    if region in le_region.classes_:
        region_encoded = le_region.transform([region])[0]
    else:
        st.error(f"Region '{region}' not found in the label encoder classes.")
        region_encoded = None

    if region_encoded is not None:
        input_data = pd.DataFrame([[age, bmi, children, smoker_encoded, region_encoded]],
                                  columns=['age', 'bmi', 'children', 'smoker', 'region'])
        predicted = model.predict(input_data)
        st.write(f"Predicted Insurance Charges: ${predicted[0]:.2f}")

        # Radar chart visualization
        st.header("Radar Chart for Input Comparison")
        input_stats = {
            'Age': age / 100,  # Normalizing to 0-1 range
            'BMI': bmi / 50,  # Normalizing to 0-1 range
            'Children': children / 5,  # Normalizing to 0-1 range
            'Smoker': smoker_encoded,  # Binary, no normalization needed
            'Region': region_encoded / len(le_region.classes_)  # Normalizing based on total regions
        }

        categories = list(input_stats.keys())
        values = list(input_stats.values())
        values += values[:1]  # Repeat first value to close the radar chart

        fig = go.Figure()
        fig.add_trace(go.Scatterpolar(
            r=values,
            theta=categories + [categories[0]],
            fill='toself',
            name='User Input'
        ))
        fig.update_layout(
            polar=dict(
                radialaxis=dict(visible=True, range=[0, 1])
            ),
            showlegend=False
        )
        st.plotly_chart(fig)


Overwriting Insurance_Prediction_app.py


In [14]:
!streamlit run Insurance_Prediction_app.py & >/content/logs.text & curl ipv4.icanhazip.com

34.148.119.250

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.148.119.250:8501[0m
[0m
[34m  Stopping...[0m


In [15]:
!npx localtunnel --port 8501

your url is: https://wicked-squids-clap.loca.lt
^C
