# Linear Regression Demonstration

Run the code block below to run a plotly dash linear regression model. Use it to demonstrate how a linear regression model will find the line of best fit in a dataset to make predictions. You can change the number of data points as well as show metrics such as RMSE and residuals.

You may need to install plotly dash to run. 

<ol>
    <li> On the Jupyter Navigator select New and Terminal </li>
    <li> Type pip install dash and run </li>
</ol>

In [None]:
import json
import numpy as np
import dash
import flask
import math
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import plotly.graph_objs as go
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
from random import randint
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

# Define the Dash app and initial config
app = dash.Dash(__name__)
app.scripts.config.serve_locally = True

# Define the Dash app layout in HTML, using an overarching Div element
app.layout=html.Div(children=
        [
            html.H1(id='header',
                    children=['Linear Regression Demonstration'],
                    style={ 'textAlign': 'center',
                            'color': 'blue',
                            'fontSize':50,
                            'font-family':'Verdana',}
                    ),

            html.Div(id='instructions',
                     children=['Select the number of points to display in the box below',
                                html.Br(),'You can select different metrics to see how the model performs'],
                     style={'textAlign': 'center',
                            'color': 'blue',
                            'fontSize':20,
                            'font-family':'Verdana',
                            'opacity': 0.4,}
                    ),
            
            html.Br(),
            
            html.Div(id='data', style={'display': 'none'}),
            
            html.Div(id='data_table', style={'fontSize': 24,}),
            
            dcc.Graph(id='graph'),

            dcc.Input(id='number',max=30,type='number',value=10,min=2),

            dcc.RadioItems(id='residuals',options=[{'label':'Residuals Off','value':'no'},
                                                   {'label':'Residuals On','value':'yes'}],value='no'),

            dcc.RadioItems(id='mean',options=[{'label':'Mean Off','value':'no'},
                                              {'label':'Mean On','value':'yes'}],value='no'),

            html.Br(),

            html.Div(id='metrics'),

            html.Br()
        ]
    )


@app.callback(
    [Output('data', 'children')],
    [Input('number', 'value')]
)
def store_data(number):
    # Generate the sample data
    x, y = generate_data(number)
    return [json.dumps({'x': x, 'y': y})]
    

@app.callback(
    [Output('data_table', 'children')],
    [Input('data', 'children')]
)
def display_data(data_json):
    # Retrieve data from Dashboard's data component
    data = json.loads(data_json)
    x, y = np.array(data['x']), np.array(data['y'])
    children = f'x: {x}', html.Br(), f'y: {y}'
    return children,


@app.callback(
    [Output('graph', 'figure'),
     Output('metrics','children')],
    [Input('mean', 'value'),
     Input('residuals','value'),
     Input('data', 'children')]
)
def display_output(mean, resid, data_json):
    # Retrieve data from Dashboard's data component
    data = json.loads(data_json)
    x, y = np.array(data['x']), np.array(data['y'])
    
    # Linear Regression model and prediction
    lr = LinearRegression().fit(x.reshape(-1, 1), y)
    pred = lr.predict(x.reshape(-1, 1))
    
    # Calculate metrics (R-squared, Mean Absolute Error, Root Mean Square Error)
    r2, mae, rmse = calculate_metrics(x.reshape(-1, 1), y, pred, lr)
    
    # Display metrics
    children = [f'R^2 Score: {r2}',
                html.Br(),
                f'Mean Absolute Error: {mae}',
                html.Br(),
                f'Root Mean Squared Error: {rmse}',
                html.Br(),
                f'Equation: y = {round(lr.coef_[0], 5)}x + {round(lr.intercept_, 5)}',
                ]
    
    # Generate x-axis
    x_axis = [i for i in range(x.max()+2)]
    # Calculate mean, if toggled
    if mean == 'yes':
        mu = np.mean(y)
    else:
        mu = []
        
    # Display graph
    fig = go.Figure()
    
    # Scatter plot of data
    fig.add_trace(go.Scatter(
        x = [i for i in x],
        y = [j for j in y],
        mode = 'markers',
        showlegend = False
    )),
    
    # Plot regression line
    fig.add_trace(go.Scatter(
        name = 'Regression Line',
        x = [i for i in x_axis],
        y = [lr.intercept_+lr.coef_[0]*x_axis[i] for i in x_axis],
    )),
        
    # Plot mean line
    fig.add_trace(go.Scatter(
        name = 'Mean Line',
        x = [i for i in x_axis],
        y = [mu for i in x_axis],
    )),
    
    # Add residuals, if toggled
    if resid == 'yes':
        for i in range(len(x)):
            if y[i] > pred[i]:
                fig.add_shape(
                    dict(type='line', x0=x[i], y0=pred[i], x1=x[i], y1=y[i]))
            else:
                fig.add_shape(
                    dict(type='line',x0=x[i],y0=y[i],x1=x[i],y1=pred[i]))

    return fig, children


def calculate_metrics(X, y, pred, model):
    r2 = model.score(X, y, pred)
    mae = mean_absolute_error(y, pred)
    rmse = mean_squared_error(y, pred)**0.5
    return r2, mae, rmse


def generate_data(number):
    x = [randint(0, 30) for x in range(0, int(number))]
    y = [randint(0, 30) for y in range(0, int(number))]
    return x, y


application=app.server
if __name__ == '__main__':
    application.run(debug=False)