# Linear Regression
Linear Regression is the supervised Machine Learning model in which the model finds the best fit linear line between the independent and dependent variable.

![](https://www.voxco.com/wp-content/uploads/2021/11/Linear-Regression1.png)

## What is a line

In [9]:
from IPython.display import display
from ipywidgets import interactive, widgets
from helper_funcitons import draw_line

w = interactive(draw_line, 
                slope = widgets.FloatSlider(min=-20, max=20, step=0.1, value=1),
               intercept = widgets.FloatSlider(min=-3, max=3, step=0.5, value=0))

display(w) 

interactive(children=(FloatSlider(value=1.0, description='slope', max=20.0, min=-20.0), FloatSlider(value=0.0,…

# How does a line help us with predictions ?

In [10]:
from helper_funcitons import plot_data_and_line

w = interactive(plot_data_and_line, 
                slope = widgets.FloatSlider(min=-2, max=4, step=0.1, value=1),
               intercept = widgets.FloatSlider(min=-10, max=15, step=0.1, value=0))

display(w)

interactive(children=(FloatSlider(value=1.0, description='slope', max=4.0, min=-2.0), FloatSlider(value=0.0, d…

# Let's work with some dummy data
Let's say we have size and weight as variables. What line can we fit best to this data

In [11]:
from helper_funcitons import create_data_line_residuals_rmse

w = interactive(create_data_line_residuals_rmse, 
                slope = widgets.FloatSlider(min=-2, max=4, step=0.05, value=1),
               intercept = widgets.FloatSlider(min=-10, max=20, step=0.5, value=0))

display(w)

interactive(children=(FloatSlider(value=1.0, description='slope', max=4.0, min=-2.0, step=0.05), FloatSlider(v…

# Simulating on artificially created data

In [17]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import numpy as np

# Plotting the residual values
import random
random_choice = [random.randint(0,50) for i in range(10)]
random_choice


random_slope = np.random.normal()
random_intercept = np.random.normal()

our_range = [i for i in range(0,150,2)]

line_points = np.array([(random_slope*i + random_intercept) for i in our_range])
random_noise = np.array([np.random.randint(-10,100) for i in our_range])
data_points = line_points + random_noise



def create_data_line_residuals_ssr(slope,intercept):

    x = np.array(range(0,150,2))
    x = x.reshape(-1,1)
    # ----------------------------------------------------------

    import matplotlib.pyplot as plt

    plt.figure(figsize=(10,8))
    ax = plt.gca()
    plt.scatter(x,data_points) # Plotting data


# ----------------------------------------------------------
    plt.title('Trying to find the line that minimizes SSR')
    plt.xlabel('X')
    plt.ylabel('Y')
    # ----------------------------------------------------------
    
    
    # ----------------------------------------------------------
    # Plotting a line
    abline_values = slope * x + intercept
    
    # ----------------------------------------------------------
    
    # Plot the best fit line over the actual values    
    plt.xlim([-20, 150])
    plt.ylim([-20,150])
    plt.plot(x, abline_values, '--')
    plt.grid()
    # ----------------------------------------------------------
    
    # Plotting residuals
    for i in random_choice:
        plt.plot((x[i],x[i]), (abline_values[i], data_points[i]), '--')
        
    # ----------------------------------------------------------
    
    # Calculating SSR
    predicted_values = np.array(abline_values)
#     ssr = np.sum(np.square(data_points - predicted_values))
    rmse = np.sqrt(np.mean((predicted_values-data_points)**2))

    
    # Plotting text at top left
    plt.text(.01, .99, f'Slope = {slope}', ha='left', va='top', transform=ax.transAxes)
    plt.text(.01, .95, f'Intecept = {intercept}', ha='left', va='top', transform=ax.transAxes)    
    plt.text(.01, .90, f'y = {slope}*x + {intercept}', ha='left', va='top', transform=ax.transAxes)
    
    plt.text(1, .5, f'  RMSE = {round(rmse,2)}', ha='left', va='top', fontsize= 'large',transform=ax.transAxes)
    # ----------------------------------------------------------
    plt.show()
    # ----------------------------------------------------------

w = interactive(create_data_line_residuals_ssr, 
                slope = widgets.FloatSlider(min=-10, max=10, step=0.1, value=1),
               intercept = widgets.FloatSlider(min=-30, max=50, step=2, value=0))

display(w)

interactive(children=(FloatSlider(value=1.0, description='slope', max=10.0, min=-10.0), FloatSlider(value=0.0,…