In [6]:
import pandas as pd
import joblib

In [7]:
# Load the trained model (Ensure it's a Pipeline that includes preprocessing)
best_model_gb = joblib.load("GBR_model.pkl")

In [8]:
# Load the CSV file
file_path = "validation_dataset.csv"  # Replace with your actual file path
df = pd.read_csv(file_path)

In [9]:
# Ensure column names match
required_columns = ['age', 'sex', 'bmi', 'children', 'smoker', 'region']
if not all(col in df.columns for col in required_columns):
    raise ValueError(f"CSV file must contain these columns: {', '.join(required_columns)}")

In [10]:
# Convert categorical values to match training data (if necessary)
# Example: Standardizing categorical values
df["sex"] = df["sex"].str.lower().map({"male": "M", "female": "F"})  # Ensure format consistency
df["smoker"] = df["smoker"].str.lower().map({"yes": "yes", "no": "no"})  # Avoid mixed cases

In [11]:
# Ensure `region` values match what the model saw during training
valid_regions = ["northeast", "northwest", "southeast", "southwest"]  # Change if needed
df = df[df["region"].isin(valid_regions)]  # Filter out unknown regions

In [12]:
# Make predictions
df["predicted_charges"] = best_model_gb.predict(df)
df


Unnamed: 0,age,sex,bmi,children,smoker,region,predicted_charges
0,18.0,F,24.09,1.0,no,southeast,5463.732656
1,39.0,M,26.41,0.0,yes,northeast,22810.499627
2,27.0,M,29.15,0.0,yes,southeast,19452.761436
3,71.0,M,65.502135,13.0,yes,southeast,45777.804726
4,28.0,M,38.06,0.0,no,southeast,7628.722147
5,70.0,F,72.958351,11.0,yes,southeast,58284.485182
6,29.0,F,32.11,2.0,no,northwest,8047.802897
7,42.0,F,41.325,1.0,no,northeast,10501.612102
8,48.0,F,36.575,0.0,no,northwest,12410.335002
9,63.0,M,33.66,3.0,no,southeast,15400.372215


## upload CSV validation data

In [16]:
import dash
from dash import dcc, html, Input, Output, State
import dash_bootstrap_components as dbc
import pandas as pd
import joblib
import base64
import io

# Load the trained model (Ensure it's a Pipeline that includes preprocessing)
best_model_gb = joblib.load("GBR_model.pkl")

# Initialize the Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Insurance Cost Prediction"

# Define the layout
app.layout = dbc.Container(
    [
        # Title
        dbc.Row(
            dbc.Col(
                html.H1("Insurance Cost Prediction", className="text-center my-4"),
                width=12
            )
        ),

        # File Upload Section
        dbc.Row(
            dbc.Col(
                [
                    html.H4("Upload CSV File", className="mb-3"),
                    dcc.Upload(
                        id='upload-data',
                        children=html.Div([
                            'Drag and Drop or ',
                            html.A('Select a CSV File')
                        ]),
                        style={
                            'width': '100%',
                            'height': '60px',
                            'lineHeight': '60px',
                            'borderWidth': '1px',
                            'borderStyle': 'dashed',
                            'borderRadius': '5px',
                            'textAlign': 'center',
                            'margin': '10px'
                        },
                        multiple=False
                    ),
                    html.Div(id='output-data-upload'),
                ],
                md=12
            )
        ),

        # Prediction Results Section
        dbc.Row(
            dbc.Col(
                [
                    html.H4("Prediction Results", className="mt-4"),
                    html.Div(id='prediction-results')
                ],
                md=12
            )
        )
    ],
    fluid=True
)

# Callback to handle file upload and predictions
@app.callback(
    [Output('output-data-upload', 'children'),
     Output('prediction-results', 'children')],
    [Input('upload-data', 'contents')],
    [State('upload-data', 'filename')]
)
def update_output(contents, filename):
    if contents is None:
        return "No file uploaded yet.", ""

    # Parse the uploaded file
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))

    # Ensure column names match
    required_columns = ['age', 'sex', 'bmi', 'children', 'smoker', 'region']
    if not all(col in df.columns for col in required_columns):
        return f"CSV file must contain these columns: {', '.join(required_columns)}", ""

    # Ensure categorical values match what the model saw during training
    df["sex"] = df["sex"].str.lower().map({"male": "M", "female": "F"})  # Ensure format consistency
    df["smoker"] = df["smoker"].str.lower()
    valid_regions = ["northeast", "northwest", "southeast", "southwest"]
    df = df[df["region"].str.lower().isin(valid_regions)]  # Ensure valid regions

    try:
        # Make predictions (pipeline should handle encoding)
        df["predicted_charges"] = best_model_gb.predict(df)

        # Display the uploaded data and predictions
        uploaded_data_table = dbc.Table.from_dataframe(
            df.head(),
            striped=True,
            bordered=True,
            hover=True,
            responsive=True
        )

        prediction_results_table = dbc.Table.from_dataframe(
            df[['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'predicted_charges']],
            striped=True,
            bordered=True,
            hover=True,
            responsive=True
        )

        return uploaded_data_table, prediction_results_table

    except ValueError as e:
        return f"Error in prediction: {e}", ""

if __name__ == '__main__':
    app.run_server(debug=True, port=8060)  # جرب منفذ آخر مثل 8060 أو 8070

