<a href="https://colab.research.google.com/github/Sushmitha-93/Linear-Regression-Algorithm-Exploration-in-Python/blob/main/Linear_Regression_Exploration_in_python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Linear Regression**

#### **Read the input datapoints**

In [9]:
# Making the imports
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Input x and y data
data = pd.read_csv('data.csv')
X = data.iloc[:, 0]
Y = data.iloc[:, 1]


#### **Visualizing datapoints using scatter plot**

In [10]:
# Plotting datapoints using scatter plot
fig = px.scatter(x=X, y=Y,height=400,width=800)
fig.show()

#### **Build Gradient Descent optimization algorithm**

In [11]:
# Initializing parameters
m = 0 # Slope of the prediction line
c = 0 # Intercept of the prediction line
LearningRate = 0.0001  
iterations = 1000  # The number of iterations to perform gradient descent
n = float(len(X)) # Number of Datapoints

# Creating dictionary object to strore prediction outcome in each iteration (To later create a gif of regression line in each step)
lines={'min_X':[], 'max_X':[],'min_Y_pred':[],'max_Y_pred':[],'cost':[],'m':[],'c':[]}

# Performing Gradient Descent 
for i in range(iterations): 

    # The current predicted line
    Y_pred = m*X + c  

    # *********** Saving predicted lines in each step of GD  ************
    lines['min_X'].append(min(X))
    lines['max_X'].append(max(X))
    lines['min_Y_pred'].append(min(Y_pred))
    lines['max_Y_pred'].append(max(Y_pred))
    lines['m'].append(m)
    lines['c'].append(c)
    # ********************************************************************

    # Calculate Cost function - MSE
    cost= (1/n ) * sum([val**2 for val in (Y-Y_pred)])
    lines['cost'].append(cost)

    # Derivative of Cost function wrt to Slope(m) and Intercept(c)
    D_m = (-2/n) * sum(X * (Y - Y_pred))  
    D_c = (-2/n) * sum(Y - Y_pred)        

    # Update Slope and Intercept of next prediction line
    m = m - LearningRate * D_m  
    c = c - LearningRate * D_c  

print (m, c)    

#Converting dictionary to pandas dataframe
lines=pd.DataFrame(lines)  

1.4796491688889395 0.10148121494753734


In [12]:
lines

Unnamed: 0,min_X,max_X,min_Y_pred,max_Y_pred,cost,m,c
0,25.128485,70.346076,0.000000,0.000000,5611.166154,0.000000,0.000000
1,25.128485,70.346076,18.670859,52.241915,1479.041610,0.742434,0.014630
2,25.128485,70.346076,27.982353,78.295834,451.305053,1.112697,0.021963
3,25.128485,70.346076,32.626172,91.289352,195.687745,1.297353,0.025656
4,25.128485,70.346076,34.942142,97.769428,132.110908,1.389443,0.027534
...,...,...,...,...,...,...,...
995,25.128485,70.346076,37.282640,104.189130,111.011197,1.479656,0.101120
996,25.128485,70.346076,37.282676,104.189103,111.011145,1.479655,0.101193
997,25.128485,70.346076,37.282713,104.189076,111.011093,1.479653,0.101265
998,25.128485,70.346076,37.282749,104.189048,111.011041,1.479652,0.101337


#### **Making predictions**

In [13]:
# Making predictions
Y_pred = m*X + c

fig1=px.scatter(x=X,y=Y)
fig2=px.line(x=[min(X), max(X)],y=[min(Y_pred), max(Y_pred)]) # regression line
fig2.update_traces(line_color='red')
fig3 = go.Figure(data=fig1.data + fig2.data)
fig3.update_layout(height=500,width=1000,yaxis_range=[-10,140])
fig3.show()

### **Creating simple GIF image of each step in Gradient Descent**

In [None]:
pip install -U gif
pip install "gif[plotly]"

In [14]:
import gif

fig1=px.scatter(x=X,y=Y)

@gif.frame
def plot(i):       
    fig2=px.line(x=[lines['min_X'][i], lines['max_X'][i]],y=[lines['min_Y_pred'][i],lines['max_Y_pred'][i]]) # regression line
    fig2.update_traces(line_color='red')
    fig = go.Figure(data=fig1.data + fig2.data)
    fig.update_layout(yaxis_range=[-10,140], title_text=f"Gradient Descent Step {i}: Cost (MSE)={lines['cost'][i]} \n m={lines['m'][i]}, c={lines['c'][i]}")
    return fig

In [15]:
frames = []
for i in range(11):
  frame = plot(i)
  frames.append(frame)

In [16]:
pip install -U kaleido



In [17]:
gif.save(frames, 'Gradient Descent.gif', duration=800)

### **Making animation using Plotly for each step in gradient descent**

In [18]:
# create a list of frames
frames = []

# create a frame for every line y
for i in range(21):
    # update the line
    fig2=px.line(x=[lines['min_X'][i], lines['max_X'][i]],y=[lines['min_Y_pred'][i],lines['max_Y_pred'][i]]) # regression line
    fig2.update_traces(line_color='red')

    # 1. Create the Play button
    button = {
        "type": "buttons",
        "buttons": [
            {
                "label": "Play",
                "method": "animate",
                "args": [None, {"frame": {"duration": 900}}],  # Duration for each frame
            }
        ],
    }

    # 2. Define Layout. Add the button to the layout and update the title to show the gradient descent step
    layout = go.Layout(updatemenus=[button], yaxis_range=[-10,140],
                       title_text=f"Gradient Descent Step {i}: Cost (MSE)={lines['cost'][i]} \n m={lines['m'][i]}, c={lines['c'][i]}")
    
    # 3. Create a frame object for each line in step
    frame = go.Frame(
        data=fig1.data + fig2.data, 
        layout=go.Layout(yaxis_range=[-10,140],title_text=f"Gradient Descent Step {i}: Cost (MSE)={lines['cost'][i]} \n m={lines['m'][i]}, c={lines['c'][i]}")
    )

    # 4. Add the frame object to the frames list
    frames.append(frame)



In [19]:
# Combine the graph_objects and frames into a figure
fig = go.Figure(data=fig1.data + fig2.data,
                frames=frames,                # Add frames objects
                layout = layout)
                                 
# show our animation!
fig.show()

In [20]:
# save the figure
fig.write_html("Gradient_Descent_Plotly.html", auto_play=True)

###**Surface plot of Cost (MSE) Vs Slope(m) Vs Intercept(c)**

In [21]:
import plotly.graph_objects as go
from scipy.interpolate import griddata

x = np.array(lines['c'])
y = np.array(lines['m'])
z = np.array(lines['cost'])

xi = np.linspace(x.min(), x.max(), 100)
yi = np.linspace(y.min(), y.max(), 100)

Xi,Yi = np.meshgrid(xi,yi)

Z = griddata((x,y),z,(Xi,Yi), method='cubic')


# plot the surface plot with plotly's Surface
fig = go.Figure(data=go.Surface(x=xi,y=yi,z=Z))

# add a countour plot
fig.update_traces(contours_z=dict(show=True, 
                                  usecolormap=True,
                                  highlightcolor="limegreen", 
                                  project_z=True))

# annotate the plot
fig.update_layout(
                  title='Linear Model MSE Cost Surface',
                  scene=dict(
                    xaxis_title='c (intercept)',
                    yaxis_title='m (slope)',
                    zaxis_title='MSE Cost'
                    ),
                  width=700, height=700)

fig.show()

### **Cost(MSE) Vs Slope (m)**

In [22]:
fig1=px.line(x=lines['m'], y=lines['cost'], title='Cost (MSE) Vs m (Slope)',labels={'x': 'm (Slope)', 'y':'Cost (MSE)'}, height=400,width=800) 
fig1.show()

### **Cost(MSE) Vs Intercept (c)**

In [23]:
fig1=px.line(x=lines['c'], y=lines['cost'], title='Cost (MSE) Vs c (Intercept)',labels={'x': 'c (Intercept)', 'y':'Cost (MSE)'}, height=400,width=800) 
fig1.show()

In [24]:
m_vals=np.arange(0, 3, 0.01)
c_vals=np.arange(-75, 75, 0.5)

In [25]:
costarr1=[]

for i in m_vals:
  Y_pred1 = i*X + c
  cost= (1/n ) * sum([val**2 for val in (Y-Y_pred1)]) # MSE
  costarr1.append(cost)
  
costarr2=[]

for i in c_vals:
  Y_pred1 = 1.5*X + i
  cost= (1/n ) * sum([val**2 for val in (Y-Y_pred1)]) # MSE
  costarr2.append(cost)

In [26]:
px.line(x=m_vals,y=costarr1,title='Cost vs Slope (m)', labels={'x': 'm (Slope)', 'y':'Cost (MSE)'}, width=500, height=500)


In [27]:
px.line(x=c_vals,y=costarr2,title='Cost vs Slope (m)', labels={'x': 'c (Intercept)', 'y':'Cost (MSE)'}, width=500, height=500)

In [28]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=1, cols=2,subplot_titles=('Cost vs Slope (m)', 'Cost vs Intercept (c)'))


fig.add_trace(
    go.Scatter(x=m_vals,y=costarr1),
    row=1, col=1
)

fig.add_trace(
     go.Scatter(x=m_vals,y=costarr1),
    row=1, col=2
)

# Update xaxis properties
fig.update_xaxes(title_text="m (Slope)", row=1, col=1)
fig.update_xaxes(title_text="c (Intercept)", row=1, col=2)

# Update yaxis properties
fig.update_yaxes(title_text="Cost (MSE)", row=1, col=1)
fig.update_yaxes(title_text="Cost (MSE)", row=1, col=2)

fig.update_layout( title_text="Cost Vs slope and intercept",showlegend=False,)
fig.show()