**IMPORTS**

In [1]:
 # Run this first
%pip install dash
%pip install Flask
from dash import Dash, dcc, html, dash_table, Input, Output, State, callback
import pandas as pd
import numpy as np
import base64
import plotly.express as px
import io

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


**Server startup**

In [2]:
app = Dash(__name__)
server = app.server # needed for gcloud!

**Frontend**

In [3]:
app.layout = html.Div([
        dcc.Upload(
            id='upload-data',
            children=html.Div([
                'Drag and Drop or ',
                html.A('Select Files')
            ]),
            style={
                'width': '100%',
                'height': '60px',
                'lineHeight': '60px',
                'borderWidth': '1px',
                'borderRadius': '5px',
                'textAlign': 'center',
                'margin': '10px',
                'background': '#E0E0E0'
            },
        ),
        dcc.Store(id='stored-data'), #data from the CSV is stored here
        html.Div(
            children = [
                html.P(
                    children = ["Select Target:"],
                    style = {
                        "display": "inline-block",
                        "margin-left": "auto",
                        "margin-top": "auto",
                        "margin-bottom": "auto",
                        "padding-right": "6px",
                    },
                ),
                dcc.Dropdown(
                    id="target-dropdown",
                    style={
                        "width": "150px",
                        "height": "40px",
                        "margin-right": "auto",
                        "display": "inline-block",
                        "margin-top": "auto",
                        "margin-bottom": "auto",
                    },
                ),
            ],
            style={
                "width": "100%",
                "height": "60px",
                "lineHeight": "40px",
                "textAlign": "center",
                "borderWidth": "1px",
                "borderRadius": "5px",
                "margin": "10px",
                "background": "#E0E0E0",
                "display": "flex",
            },
        ),

        html.Div( # Bar Graphs
            id="graphs",
            style={"margin":"auto", "text-align": "center"},
            children=[

                html.Span(
                    id="barspan",
                    style={"width":"40%", 'height':'60%', "display":"inline-block", 'padding':'5px', "vertical-align": "top"},
                    children=[
                        dcc.RadioItems(
                            id="categories",
                            options=[],
                            inline=True,
                            style={
                                'padding':'5px 0px 5px 0px'
                            }
                        ),
                        dcc.Graph( # bar graph 1
                            id="bar1",
                            figure=px.bar(),

                        ),
                    ]),
                dcc.Graph( # bar graph 2
                    id="bar2",
                    figure=px.bar(),
                    style={"width":"40%", 'height':'60%', "display":"inline-block", 'padding-top':'32px'}
                )
            ]
        ),

        html.Div(
            children=[
                dcc.Checklist(
                    id="training-list",
                    inline=True,
                    style={
                        "margin": "auto",
                    },
                ),
                html.Button(
                    "Train",
                    id="train-button",
                ),
                html.P(
                    id="r-squared-output",
                ),
            ],
            style={
                "width": "100%",
                "height": "fit-content",
                "lineHeight": "40px",
                "textAlign": "center",
                "borderWidth": "1px",
                "borderRadius": "5px",
                "margin": "10px",
                "background": "#E0E0E0",
            },
        ),

        html.Div(
            children=[
                dcc.Input(
                    id="predict-input",
                    type="text",
                    style = {
                        "margin-left": "auto",
                        "margin-right": "10px",
                        "display": "inline-block",
                        "margin-top": "5px",
                        "margin-bottom": "5px",
                        "height": "20px",
                    }
                ),
                html.Button(
                    "Predict",
                    id="predict-button",
                    style = {
                        "display": "inline-block",
                        "margin-top": "5px",
                        "margin-bottom": "5px",
                        "height": "20px",
                    }
                ),
                html.P(
                    id="predict-output",
                    style = {
                        "display": "inline-block",
                        "margin-right": "auto",
                        "margin-left": "10px",
                        "margin-top": "5px",
                        "margin-bottom": "5px",
                        "height": "20px",
                    }
                ),
            ],
            style={
                "width": "100%",
                "height": "fit-content",
                "lineHeight": "40px",
                "textAlign": "center",
                "borderWidth": "1px",
                "borderRadius": "5px",
                "margin": "10px",
                "background": "#E0E0E0",
                "display": "flex",
            },
        )
    ]
)


**Callback functions**

In [4]:
#----------------------------Call Back to Create DataFrame and Store It
@callback(Output('stored-data', 'data'),
          Input('upload-data', 'contents'),
          prevent_initial_call=True) # This is to prevent the callback from running automatically when the app first launches
def process_input(contents):
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)

    #creates dataframe from input
    data = pd.read_csv(io.StringIO(decoded.decode('utf-8')))

    #preprocessing
    numRecords = len(data) #gets number of records

    for col in data.columns.tolist():
      numNull = data[col].isnull().sum()
      if (numNull/ numRecords) > 0.5:
        data = data.drop(col, axis=1) #gets rid of column if number of nulls is greater than 50%

    data = data.dropna(how='any') #removes rows with null values

    return data.to_dict('records') # necessary as dcc.Store doesn't accept DataFrames
#----------------------------------------------------------------------

#Callback to Populate Dropdown Menu------------------------------------
@callback(Output('target-dropdown', 'options'),
          Input('stored-data', 'data'),
          prevent_initial_call=True)
def populate_dropdown(data):
   df = pd.DataFrame(data) # necessary as dcc.Store stores data as dictionaries
   return df.select_dtypes('number').columns.tolist()
#----------------------------------------------------------------------

#Callback to Populate Checklist ------------------------------------
@callback(Output('training-list', 'options'), Output('training-list', 'value'),
          Input('stored-data', 'data'),
          prevent_initial_call=True)
def populate_checklist(data):
   df = pd.DataFrame(data) # necessary as dcc.Store stores data as dictionaries
   return df.columns.tolist(), []
#----------------------------------------------------------------------

# Callbacks to create bar graph 1 --------------------------------------
# Radio Items
@callback(
    Output('categories', 'options'),
    Output('categories', 'value'),
    Input('stored-data', 'data'),
    prevent_initial_call=True
)
def populate_bar_radio(data):
    return pd.DataFrame(data).select_dtypes('object').columns.tolist(), None

# Create Bar Graph
@callback(
    Output('bar1', 'figure'),
    Input('categories', 'value'),
    State('stored-data', 'data'),
    Input('target-dropdown', 'value'),
    prevent_initial_call=True,
)
def create_bar1(category, data, target):
    if(category == None or target == None): return px.bar()
    df = pd.DataFrame(data)
    grouped = df.groupby(category)[target].mean()
    fig = px.bar(
        grouped,
        x=grouped.index,
        y=target,
        title='Average ' + category + ' by ' + target,
        text_auto=True,
    )
    fig.update_traces(marker_color='skyblue')
    return fig

# ---------------------------------------------------------------------

# Callback to create bar graph 2 ---------------------------------------
@callback(
    Output('bar2', 'figure'),
    Input('target-dropdown', 'value'),
    State('stored-data', 'data'),
    prevent_initial_call=True
)
def create_bar2(dropdown, data):
    df = pd.DataFrame(data)
    if(dropdown == None):
        return px.bar() #empty bar
    #second bar
    corr_matrix = df.corr(numeric_only=True)
    #print(dropdown)
    corrs = corr_matrix.abs().drop(labels=[dropdown], axis='index')
    corrs = corrs[[dropdown]]
    #print(corrs)
    fig = px.bar(
        corrs,
        x=corrs.index,
        y=dropdown,
        title="Correlation Strength of Numerical Variables with " + dropdown,
        text_auto=True,
    )
    fig.update_layout(
        xaxis_title='Numerical Variables',
        yaxis_title='Correlation Strength (Absolute Value)'
    )
    return fig
#----------------------------------------------------------------------

# Callback to train model----------------------------------------------
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder

model = LinearRegression()

def preprocess_data(df, values, target):
    x = df[values]
    y = df[target]

    if not x.select_dtypes(include=['object']).empty:
        x = pd.get_dummies(x, drop_first=False)

    if y.dtype == 'object':
        raise ValueError(f"Target variable {target} must be a numerical value.")
    #print (x)
    return x,y

@callback(Output('r-squared-output', 'children'),
          [Input('train-button', 'n_clicks'), State('training-list', 'value'), State('stored-data', 'data'), State('target-dropdown', 'value')],
          prevent_initial_call = True)
def show_r_squared(n_clicks, values, data, target):
    #print(values)
    if(values == None or len(values) == 0): return "Please select at least one feature to train."
    df = pd.DataFrame(data)
    x, y = preprocess_data(df, values, target)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)

    return f"The R2 score is: {r2_score(y_test, y_pred)}"
#----------------------------------------------------------------------

# Callback to predict model----------------------------------------------
@callback(
    Output("predict-output", "children"),
    Input("predict-button", "n_clicks"),
    State("predict-input", "value"),
    State("stored-data", "data"),
    State("training-list", "value"),
    State("target-dropdown", "value"),
    prevent_initial_call=True,
)
def predict_data(n_clicks, raw_input, data, x_list, y_target):
    try:
        df = pd.DataFrame(data)

        # Proprocessing values to handle the categorical values and such.
        x, _ = preprocess_data(df, x_list, y_target)
        #print(x)
        input_list = raw_input.split(",")
        coltype = df.dtypes[x_list]
        if len(input_list) != len(coltype):
          return "Incorrect number of comma-separated inputs"
        #try to one-hotify input
        cat_cols = df[x_list].select_dtypes(include='object')
        for val in cat_cols:
            prefix = val.lower() + '_'
            new_cols = [col.lower() for col in x.columns if prefix in col.lower()]
            #print(new_cols)
            for inp in input_list:
                if(np.isnan(pd.to_numeric(inp, errors="coerce")) and prefix + inp.lower() in new_cols):
                    input_list.remove(inp)
                    arr = np.array([(prefix + inp).lower()] * len(new_cols)) == np.array(new_cols)
                    input_list += arr.tolist()
        #print("RAW INPUT: ", input_list)

        # Convert user input into a DataFrame.
        input_df = pd.DataFrame([input_list], columns=x.columns)
        for col in x.columns:
            if col not in input_df.columns:
                input_df[col] = 0

        input_df = input_df[x.columns]
        input_df = input_df.apply(pd.to_numeric, errors="coerce")


        if input_df.isnull().values.any():
            input_df = input_df.fillna(0)

        # To check if the model is trained or not.
        if not hasattr(model, "coef_"):
            return "Model is not trained yet"

        y_pred = model.predict(input_df)

        return f"Predicted {y_target} is: {y_pred[0]:.2f}"
    except Exception as e:
        raise e
        return f"Error: {str(e)}"
#----------------------------------------------------------------------

if __name__ == '__main__':
    app.run(debug=True)