# Libraries used

In [1]:
# Basic libraries
import numpy as np
import pandas as pd
import datetime
import io
import base64
import warnings
warnings.filterwarnings("ignore")

# plotly dash 
import plotly.express as px
from jupyter_dash import JupyterDash
import dash
from dash.dependencies import Input, Output, State
# import dash_core_components as dcc
# import dash_html_components as html
from dash import dcc
from dash import html
import dash_table
import dash_bootstrap_components as dbc

# Machine learning
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import sklearn.metrics as metrics
from scipy.stats import zscore
from sklearn.model_selection import train_test_split

# Functions

In [2]:
# Upload file

#https://dash.plotly.com/dash-core-components/upload

def parse_contents(contents, filename, date, idx):
    content_type, content_string = contents.split(',')
    global df, dfx
    decoded = base64.b64decode(content_string)
    try:
        if 'csv' in filename:
            # Assume that the user uploaded a CSV file
            if idx == 0:
                df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
                disp = df
            else:
                dfx = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
                disp = dfx
        elif 'xls' in filename:
            # Assume that the user uploaded an excel file
            if idx == 0:
                df = pd.read_excel(io.BytesIO(decoded))
                disp = df
            else:
                dfx = pd.read_excel(io.BytesIO(decoded))
                disp = dfx
        catNum(df)
    except Exception as e:
        print(e)
        return html.Div([
            'There was an error processing this file.'
        ])

    return html.Div([
        html.H5(filename),
        dash_table.DataTable(
            data=disp.to_dict('records'),
            columns=[{'name': i, 'id': i} for i in disp.columns],
        page_size=5, style_table={'overflowX': 'auto'}, style_as_list_view=False,
            style_data={'color': 'white','backgroundColor': 'black'},
            style_header={'backgroundColor': 'white','color': 'black','fontWeight': 'bold'}),

        html.Hr() ])

In [3]:
# Classifying categorical and numerical variables in a dataframe
def catNum(df):
    global dfnum, dfcat
    dfnum,dfcat = [],[]
    for feature in df.columns: 
        if df[feature].dtype == 'int64' or df[feature].dtype == 'float':
            dfnum.append(feature)
        else:
            dfcat.append(feature)
    return dfcat,dfnum

In [4]:
# Missing value and duplicates removal
def preProc(idx):
    global df,dfnum, dfcat
    dup, miss = df.duplicated().sum(), df.isnull().sum().sum()
    if idx==0:
        return u'No of duplicates (before): {}'.format(dup), u'No of missing values (before): {}'.format(miss)
    if idx==1:
        df.drop_duplicates(inplace=True)
        df.dropna(inplace=True)
        return u'No of duplicates (after) : {}'.format(dup), u'No of missing values (after) : {}'.format(miss)
def remove_outlier(col):
    sorted(col)
    Q1,Q3=np.percentile(col,[25,75])
    IQR=Q3-Q1
    lower_range= Q1-(1.5 * IQR)
    upper_range= Q3+(1.5 * IQR)
    return lower_range, upper_range

In [5]:
# Bulid regression models
def regMod(child):
    global df,dfm,dfd,dfs,dfsnum,dfscat
    dfm = df.copy()
    y_var = child[0]
    x_var = child[1:]
    dfm = dfm[child]
    obcols=[]
    for i in dfm.columns: 
        if dfm[i].dtype == 'object': 
            obcols.append(i)
    if len(obcols)>0:
        dfd = pd.get_dummies(dfm, columns=obcols ,drop_first=True)
    else:
        dfd = dfm.copy()
    dfs=dfd.apply(zscore)

    X = dfs.drop(y_var, axis=1) 
    y = dfs[[y_var]]
    dfsnum,dfscat = [],[]
    for feature in dfs.columns: 
        if dfs[feature].dtype == 'int64' or dfs[feature].dtype == 'float':
            dfsnum.append(feature)
        else:
            dfscat.append(feature)
            
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25 , random_state=1)
    regs = [LinearRegression(), Lasso(), Ridge(),KNeighborsRegressor(), DecisionTreeRegressor()]
    regsName = ['Linear Regression', 'Lasso Regression', 'Ridge Regression','KNeighbors Regression',' Decision Tree Regression']

    reg = pd.DataFrame(columns = ['Model','Train Rsq','Test Rsq','Train RMSE','Test RMSE','Train MAPE','Test MAPE','Train MAD','Test MAD'])
    for c,i in enumerate(regs):
        global model
        model = i
        model.fit(X_train,y_train)
        y_train_pred = model.predict(X_train)
        y_test_pred = model.predict(X_test)
        modName = regsName[c]
        reg_temp = pd.DataFrame({'Model':modName,'Train Rsq':model.score(X_train, y_train),'Test Rsq':model.score(X_test, y_test),'Train RMSE':np.sqrt(metrics.mean_squared_error(y_train,y_train_pred)),'Test RMSE':np.sqrt(metrics.mean_squared_error(y_test,y_test_pred)),'Train MAPE': metrics.mean_absolute_percentage_error(y_train,y_train_pred),'Test MAPE': metrics.mean_absolute_percentage_error(y_test,y_test_pred),'Train MAD': metrics.mean_absolute_error(y_train,y_train_pred),'Test MAD': metrics.mean_absolute_error(y_test,y_test_pred)},index=[i])
        reg = reg.append(reg_temp)
    return dbc.Container([
    dbc.Row([html.Div(['Modal Performance:'])]),
    dbc.Row([html.Div([
            dash_table.DataTable(
                data=reg.to_dict('records'),
                columns=[{'name': i, 'id': i} for i in reg.columns],
                page_size=5, style_table={'overflowX': 'auto'}, style_as_list_view=False,
                style_data={'color': 'white','backgroundColor': 'black'},
                style_header={'backgroundColor': 'white','color': 'black','fontWeight': 'bold'}),
    ])
    ]),
    dbc.Row([html.Div([' | '],style={'color': 'black', 'fontSize': 20})]),
    dbc.Row([html.Div(['Upload data in the below mentioned format'])]),
        
    dbc.Row([html.Div([' | '],style={'color': 'black', 'fontSize': 10})]),
        
    dbc.Row([html.Div([
            dash_table.DataTable(
                columns=[{'name': i, 'id': i} for i in X.columns],
                style_table={'overflowX': 'auto'},
                style_header={'backgroundColor': 'black','color': 'white'},
                style_cell={'height': 'auto',
                            'minWidth': '180px', 'width': '180px', 'maxWidth': '1800px',
                            'whiteSpace': 'normal'
                        })
    ])
    ]),
    
        
    dbc.Row([html.Div([' | '],style={'color': 'black', 'fontSize': 10})]),    
    
                         ])


In [6]:
# Find the response variable using selected regression method
def regModPred(child):
    global df,dfm,dfd,dfs,dfsnum,dfscat
    dfm = df.copy()
    j = child[0]
    y_var = child[1]
    x_var = child[2:]
    dfm = dfm[child[1:]]
    obcols=[]
    for i in dfm.columns: 
        if dfm[i].dtype == 'object': 
            obcols.append(i)
    if len(obcols)>0:
        dfd = pd.get_dummies(dfm, columns=obcols ,drop_first=True)
    else:
        dfd = dfm.copy()
    dfs=dfd.apply(zscore)

    X = dfd.drop(y_var, axis=1) 
    y = dfd[[y_var]]
    dfsnum,dfscat = [],[]
    for feature in dfs.columns: 
        if dfs[feature].dtype == 'int64' or dfs[feature].dtype == 'float':
            dfsnum.append(feature)
        else:
            dfscat.append(feature)
            
    regs = [LinearRegression(), Lasso(), Ridge(),KNeighborsRegressor(), DecisionTreeRegressor()]
    regsName = ['Linear Regression', 'Lasso Regression', 'Ridge Regression','KNeighbors Regression',' Decision Tree Regression']
    if j=='Linear':
        regs = [LinearRegression()]
    if j=='Lasso':
        regs = [Lasso()]
    if j=='Ridge':
        regs = [Ridge()]
    if j=='KNeighbors':
        regs = [KNeighborsRegressor()]
    if j=='Decision Tree':
        regs = [DecisionTreeRegressor()]

    for i in regs:
        global model
        model = i
        model.fit(X,y)
    ansX = model.predict(dfx)
    return dbc.Container([
    dbc.Row([html.Div(['The output for {} is {}'.format(y_var,ansX)])]),

    dbc.Row([html.Div([' | '],style={'color': 'black', 'fontSize': 20})]),
        
    dbc.Row([html.Div([' | '],style={'color': 'black', 'fontSize': 10})]),    
    
                         ])

# Code for dashboard

In [9]:
#  ****************************** Dummy placeholder values ****************************** 
dfnum,dfcat = ['x','y'],['a','b']
df = pd.DataFrame(columns=['x','y','a','b'])
#  ****************************** Dashboard ******************************
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.CYBORG])

app.layout = dbc.Container([

    dbc.Row([
        dbc.Col(html.H1("Data Science toolkit",
                        className='text-center text-primary mb-4'),
                width=12)
    ]),

# ****************************** Drag and drop ****************************** 
    dbc.Row([
            dcc.Upload(
        id='upload-data',
        children=html.Div([
            'Drag and Drop or ',
            html.A('Select Files')
        ]),
        style={
            'width': '100%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        multiple=True
    ),
    html.Div(id='output-data-upload'),
    ]),

# ****************************** Preprocessing  ******************************
    dbc.Row([dbc.Col([
            html.H3("Pre-processing",
                        className='text-left text-primary mb-3')
            ],width={'size':5, 'offset':0, 'order':1})
    ]),
# ******************** Missing values and Duplicates 
    dbc.Row([dbc.Col([
        html.Div(id = 'missing')
    ],width={'size':3, 'offset':1, 'order':1}),
             dbc.Col([
                 html.Div(id = 'duplicates')
             ],width={'size':2, 'offset':0, 'order':2}),
            dbc.Col([
                    html.Button('Remove duplicates and missing values',id = 'rmvDMBtn',n_clicks = 0)
            ],width={'size':4, 'offset':0, 'order':3}),
    ], justify='start'),
    
    dbc.Row([dbc.Col([
        html.Div(id = 'missingUpd')
    ],width={'size':3, 'offset':1, 'order':1}),
             dbc.Col([
                 html.Div(id = 'duplicatesUpd')
             ],width={'size':2, 'offset':0, 'order':2})
    ], justify='start'),
    
    dbc.Row([html.Div([' | '],style={'color': 'black', 'fontSize': 40})]),
# ******************** Box plot    
    dbc.Row([
        dbc.Col([html.Label(['Outlier Analysis'])  
        ],width={'size':3, 'offset':1, 'order':1}),
        dbc.Col([
            html.Button('Remove outliers',id = 'rmvOut',n_clicks = 0)
        ],width={'size':3, 'offset':1, 'order':2})
    ]),
    
    dbc.Row([dbc.Col([
        dcc.Graph(id='box1', figure={})
        ],width={'size':10, 'offset':1, 'order':1})   
    ]),
    
# ****************************** Exploratory Data Analysis ******************************     
    dbc.Row([dbc.Col([
            html.H3("Exploratory data analysis",
                        className='text-left text-primary mb-3')
            ],width={'size':5, 'offset':0, 'order':1})
    ]),
# ******************** Univariate and Bivariate analysis        
    dbc.Row([
        dbc.Col([html.Label(['Univariate Analysis',])
            ], width={'size':5, 'offset':1, 'order':1}
            ),
        dbc.Col([html.Label(['Bivariate Analysis',])
            ], width={'size':5, 'offset':1, 'order':2}
            ),
        ], justify='start'),
    
    dbc.Row([
        dbc.Col([
            dcc.Dropdown(id='my-dpdn1', multi=False, value = dfnum[0],
                         options=[{'label':x, 'value':x}
                                  for x in sorted(df.columns)],
                         )
        ], width={'size':2, 'offset':1, 'order':1}
        ),

        dbc.Col([
            dcc.Dropdown(id='my-dpdn2', multi=False,  value = dfcat[1],
                         options=[{'label':x, 'value':x}
                                  for x in sorted(df.columns)],
                         )
        ], width={'size':2, 'offset':4, 'order':2}
        ),
        
             dbc.Col([
            dcc.Dropdown(id='my-dpdn3', multi=False,  value = dfnum[0],
                         options=[{'label':x, 'value':x}
                                  for x in sorted(dfnum)]
                         )
        ], width={'size':2, 'offset':0, 'order':3}
        ),

    ], justify='start'),
    
    dbc.Row([
        dbc.Col([
           dcc.Graph(id='line-fig1', figure={}) 
        ],width={'size':4, 'offset':1, 'order':1}),
        dbc.Col([
           dcc.Graph(id='line-fig2', figure={}) 
        ],width={'size':4, 'offset':2, 'order':2})
    ]),
    
    dbc.Row([html.Div([' | '],style={'color': 'black', 'fontSize': 30})]),
# ******************** Multivariate analysis      
    dbc.Row([
        dbc.Col([html.Label(['Multivariate Analysis',])
            ], width={'size':6, 'offset':1, 'order':1}
            ),
        dbc.Col([html.Label(['Correlation plot',])
            ], width={'size':3, 'offset':2, 'order':2}
            )
        ], justify='start'),    
    
    dbc.Row([
        dbc.Col([
            dcc.Dropdown(id='my-dpdn4', multi=False, value = dfcat[0],
                         options=[{'label':x, 'value':x}
                                  for x in sorted(dfcat)],
                         )
        ], width={'size':2, 'offset':1, 'order':1}
        ),

        dbc.Col([
            dcc.Dropdown(id='my-dpdn5', multi=False, value = dfnum[0],
                         options=[{'label':x, 'value':x}
                                  for x in sorted(dfnum)],
                         )
        ], width={'size':2, 'offset':0, 'order':2}
        ),        
            dbc.Col([
            dcc.Dropdown(id='my-dpdn6', multi=False, value = dfcat[0],
                         options=[{'label':x, 'value':x}
                                  for x in sorted(dfcat)]
                         )
        ], width={'size':2, 'offset':0, 'order':3}
        ),
    ], justify='start'),
    
    dbc.Row([
        dbc.Col([
           dcc.Graph(id='line-fig3', figure={}) 
        ],width={'size':6, 'offset':1, 'order':1}),
        dbc.Col([
            dcc.Graph(id='corr', figure={})
        ], width={'size':5, 'offset':0, 'order':2})        
    ], justify='start'),
    
# ****************************** Modelling ****************************** 
    dbc.Row([dbc.Col([
            html.H3("Model building",
                        className='text-left text-primary mb-3')
            ],width={'size':5, 'offset':0, 'order':1})
    ]),

    dbc.Row([
        dbc.Col([
        html.H5("Regression models",
                        className='text-left text-primary mb-3')
        ],width={'size':5, 'offset':0, 'order':1})
    ]),
# ******************** Choose variables for prediction 
    dbc.Row([
        dbc.Col([
            html.Label(['Response variable(y): ',])
        ],width={'size':2, 'offset':0, 'order':0}),
        dbc.Col([
            html.Label(['Predictor variable(x): ',])
        ],width={'size':2, 'offset':1, 'order':1})
    ], justify='start'),

    dbc.Row([
        dbc.Col([
            dcc.Dropdown(id='y_var1', multi=False,
                         options=[{'label':x, 'value':x}
                                  for x in dfnum],)
        ],width={'size':2, 'offset':0, 'order':0}),
        dbc.Col([
            dcc.Dropdown(id='x_var1', multi=True,
                         options=[{'label':x, 'value':x}
                                  for x in df.columns],)
        ],width={'size':9, 'offset':1, 'order':1}),
    ], justify='start'),
    
    dbc.Row([html.Div([' | '],style={'color': 'black', 'fontSize': 5})]),
    
    dbc.Row([
        dbc.Col([
            html.Button('Build model !!!',id = 'mdlBtn1',n_clicks = 0)
        ],width={'size':2, 'offset':0, 'order':3})
    ], justify='start'),
    
    dbc.Row([html.Div([' | '],style={'color': 'black', 'fontSize': 10})]),
# ******************** Model performance    
    dbc.Row([
        dbc.Col([
            html.Div(id = 'mdlOut1')
        ],width={'size':12, 'offset':0, 'order':1})
    ], justify='start'),
    
    dbc.Row([html.Div([' | '],style={'color': 'black', 'fontSize': 10})]),
    
    dbc.Row([
    html.Label(['Select model type: ',])
    ]),
# ******************** Choose best model
    dbc.Row([
        dcc.Dropdown(id='mdlLst1', multi=False,
                     options=[{'label':x, 'value':x}
                              for x in ['Linear', 'Lasso', 'Ridge','KNeighbors', 'Decision Tree']],)
    ]),
 
    dbc.Row([html.Div([' | '],style={'color': 'black', 'fontSize': 5})]),
# ******************** Drop down for uploading prediction        
    dbc.Row([html.Div(['Enter values for prediction:'])]), 
            
    dbc.Row([
            dcc.Upload(
        id='upload-pred',
        children=html.Div([
            'Drag and Drop or ',
            html.A('Select Files')
        ]),
        style={
            'width': '100%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        multiple=True
    ),
    html.Div(id='mdlOut2'),
    ]),
# ******************** Final prediction    
    dbc.Row([
        dbc.Col([
            html.Button('Predict !!!',id = 'predBtn1',n_clicks = 0)
        ],width={'size':2, 'offset':0, 'order':1})
    ], justify='start'),

    dbc.Row([
        dbc.Col([
            html.Div(id = 'mdlOut3')
        ],width={'size':12, 'offset':0, 'order':1})
    ], justify='start'),
    
#     dbc.Row([
#         dbc.Col([
#             html.Div(id = 'missingUpd')
#         ],width={'size':, 'offset':, 'order':})
#     ], justify='start'),
dbc.Row([html.Div([' | '],style={'color': 'black', 'fontSize': 150})]),
], fluid=True)

# Callback section: connecting the components
# *******************************************************************************************************************
# ****************************** Exploratory Data Analysis  ****************************** 
# ******************** Univariate analysis
@app.callback(
    Output('line-fig1', 'figure'),
    Input('my-dpdn1', 'value')
)
def update_graph(x_val):
    global df
    try:
        fig = px.histogram(df, x=x_val)
        fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)',})
        fig.update_layout(font = dict(color = 'white')) 
    except:
        fig = {}
    return fig

# ******************** Bivariate analysis
@app.callback(
    Output('line-fig2', 'figure'),
    Input('my-dpdn2', 'value'),
    Input('my-dpdn3', 'value')
)
def update_graph(x_val,y_val):
    global df
    if (x_val in dfnum) & (y_val in dfnum):
        try:
            fig = px.scatter(df, x=x_val, y=y_val)
            fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)',})
            fig.update_layout(font = dict(color = 'white'))
        except:
            fig={}
        return fig
    else:
        try:
            fig = px.histogram(df, x=x_val, y=y_val)
            fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)',})
            fig.update_layout(font = dict(color = 'white'))
        except:
            fig = {}
        return fig

# ******************** Multivariate analysis
@app.callback(
    Output('line-fig3', 'figure'),
    Input('my-dpdn4', 'value'),
    Input('my-dpdn5', 'value'),
    Input('my-dpdn6', 'value')
)
def update_graph(x_val,y_val,z_val):
    global df
    try:
        fig = px.histogram(df, x=x_val, y=y_val,color=z_val)
        fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)',})
        fig.update_layout(font = dict(color = 'white'))
    except:
        fig = {}
    return fig

# ******************** Correlation plot
@app.callback(Output('corr','figure'),
              Input('my-dpdn1', 'value')
              
)
def update_corr(content):
    global df
    fig = px.imshow(df.corr())
    fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)',})
    fig.update_layout(font = dict(color = 'white'))
    return fig

# ******************** Update dropdowns in Exploratory data analysis
@app.callback([Output('output-data-upload', 'children'),
              dash.dependencies.Output('my-dpdn1', 'options'),
              dash.dependencies.Output('my-dpdn2', 'options'),
              dash.dependencies.Output('my-dpdn3', 'options'),
              dash.dependencies.Output('my-dpdn4', 'options'),
              dash.dependencies.Output('my-dpdn5', 'options'),
              dash.dependencies.Output('my-dpdn6', 'options'),
              dash.dependencies.Output('y_var1', 'options'),
              dash.dependencies.Output('x_var1', 'options'),
              dash.dependencies.Output('my-dpdn1', 'value'),
              dash.dependencies.Output('my-dpdn2', 'value'),
              dash.dependencies.Output('my-dpdn3', 'value'),
              dash.dependencies.Output('my-dpdn4', 'value'),
              dash.dependencies.Output('my-dpdn5', 'value'),
              dash.dependencies.Output('my-dpdn6', 'value'),
              Output('duplicates','children'),
              Output('missing','children')],
              Input('upload-data', 'contents'),
              State('upload-data', 'filename'),
              State('upload-data', 'last_modified'))
def update_output(list_of_contents, list_of_names, list_of_dates):
    global df,dfnum,dfcat
    if list_of_contents is not None:
        children = [parse_contents(c, n, d,idx) for c, n, d,idx in zip(list_of_contents, list_of_names, list_of_dates,[0])]
    else:
        children = html.Div([
        dash_table.DataTable(
            data=df.to_dict('records'),
            columns=[{'name': i, 'id': i} for i in df.columns],
            page_size=5, style_table={'overflowX': 'auto'}, style_as_list_view=True,
            style_data={'color': 'white','backgroundColor': 'black'},
            style_header={'backgroundColor': 'white','color': 'black','fontWeight': 'bold'}
        )
        ])
    a = [{'label': i, 'value': i} for i in df.columns]
    b = [{'label': i, 'value': i} for i in df.columns]
    c = [{'label': i, 'value': i} for i in dfnum]
    d = [{'label': i, 'value': i} for i in dfcat]
    e = [{'label': i, 'value': i} for i in dfnum]
    f = [{'label': i, 'value': i} for i in dfcat]
    g = [{'label': i, 'value': i} for i in dfnum]
    h = [{'label': i, 'value': i} for i in df.columns]    
    try:
        av = dfnum[0]
        bv = dfcat[0]
        cv = dfnum[0]
        dv = dfcat[0]
        ev = dfnum[0]
        fv = dfcat[0]
        dup, miss = preProc(0)
    except:
        av, bv, cv, dv, ev, fv,dup, miss = {},{},{},{},{},{},{},{}
    return children, a, b, c, d, e, f, g, h, av, bv, cv, dv, ev, fv, dup, miss

# ****************************** PreProcessing ****************************** 
# ******************** Remove missing values and duplicates
@app.callback(Output('duplicatesUpd','children'),
              Output('missingUpd','children'),
              Input('upload-data', 'contents'),
              Input('rmvDMBtn','n_clicks')
)
def update_miss(content,n):
    global df,dfnum, dfcat
    if (n>0) & (content is not None):
        preProc(1)
        dup, miss = preProc(1)
    else:
        dup,miss = ' ',' '
    return dup, miss

# ******************** Outlier plot
@app.callback(Output('box1', 'figure'),
              Input('my-dpdn1', 'value'),
              Input('rmvOut','n_clicks')
)
def update_boxplot(temp,n):
    global df,dfnum, dfcat
    if n==0:
        fig = px.box(df, y=dfnum, points="outliers")
        fig.update_layout({'paper_bgcolor': 'rgba(0, 0, 0, 0)',})
        fig.update_layout(font = dict(color = 'white'))
        return fig
    if n>0:
        for column in dfnum:
            lr,ur=remove_outlier(df[column])
            df[column]=np.where(df[column]>ur,ur,df[column])
            df[column]=np.where(df[column]<lr,lr,df[column])
        fig = px.box(df, y=dfnum, points="outliers")
        fig.update_layout({'paper_bgcolor': 'rgba(0, 0, 0, 0)',})
        fig.update_layout(font = dict(color = 'white'))
        return fig

# ****************************** Modelling ******************************
# ******************** Bulid all models to compare performance
@app.callback(Output('mdlOut1', 'children'),
              State('y_var1', 'value'),
              State('x_var1', 'value'),
              Input('mdlBtn1','n_clicks')
)
def update_model(y,x,n):
    if n%2==0:
        children = 'Please choose the x and y variables'
    else:
        child = []
        child.append(y)
        child.extend(x)
        children = regMod(child)
    return children

# ******************** Upload data for prediction
@app.callback(Output('mdlOut2', 'children'),
              State('y_var1', 'value'),
              State('x_var1', 'value'),
              State('upload-pred', 'filename'),
              State('upload-pred', 'last_modified'),
              Input('upload-pred', 'contents')
)
def update_model(y,x,list_of_names,list_of_dates,list_of_contents):
    if (list_of_contents is not None):
        children = [parse_contents(c, n, d,idx) for c, n, d,idx in zip(list_of_contents, list_of_names, list_of_dates,[1])]
    else:
        children = ' '
    return children

# ******************** Predict response variable for uploaded predictor input
@app.callback(Output('mdlOut3', 'children'),
              State('y_var1', 'value'),
              State('x_var1', 'value'),
              State('upload-pred', 'filename'),
              State('upload-pred', 'last_modified'),
              Input('upload-pred', 'contents'),
              Input('predBtn1','n_clicks'),
              Input('mdlLst1','value'),
)
def update_model(y,x,list_of_names,list_of_dates,list_of_contents,n,mdlType):
    if (n%2==0) and (list_of_contents is not None) and (n>0):
        child = []
        child.append(mdlType)
        child.append(y)
        child.extend(x)     
        children = regModPred(child)
    else:
        children = ' '
    return children
    
    
if __name__ == '__main__':
    app.run_server(mode='external',host='localhost')

Dash app running on http://localhost:8050/
