In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from sklearn.linear_model import LinearRegression

import numpy as np
import torch
import torch.optim as optim

In [49]:
df = pd.read_csv("cars.csv")
df.head()

Unnamed: 0,speed,dist
0,4,2
1,4,10
2,7,4
3,7,22
4,8,16


In [53]:
fig = px.scatter(df, x='speed', y='dist', title="speed-distance relation")
fig.update_layout(width=1024, height=512)
fig.show()  # The scatter plot is a nice linear spread




In [54]:
df.isnull().sum()

speed    0
dist     0
dtype: int64

In [55]:
df.dtypes

speed    int64
dist     int64
dtype: object

In [56]:
x = torch.tensor(df.speed)    # Create pytorch tensors for the input data values
y = torch.tensor(df.dist)     # x and y represent height and weight respectively. Goal is to predict weight given height.
print(x.shape)
print(y.shape)

torch.Size([50])
torch.Size([50])


In [12]:
def J_python(b0, b1):     # the mean squared error - loss function 
    mse = 0
    for i in range(len(df.weight)):
        sse = df.weight[i] - (b1 * df.height[i]  + b0)
        sse = sse ** 2
        mse += sse
    return mse / len(df.weight) # It returns a single number that is the mean squared error for given b0 and b1
     

In [13]:
J_python(0,0)

4418.874371859297

In [57]:
# alternative more effieicnt tensor-based loss function:
def J(b0, b1):
    predictions = ((b1 * x) + b0)     # tensor of predicted values 
    diff = predictions - y         # tensor of the difference between the predicted and actual y values
    return (torch.square(diff)).mean()   # return a 0-dimentional tensor (scalar) of the mean of the square of the diff tensor


In [58]:

b0 = torch.tensor([0.], requires_grad=True)
b1 = torch.tensor([0.], requires_grad=True)
print(b0.shape)
print(b1.shape)

torch.Size([1])
torch.Size([1])


In [59]:
LEARN_RATE = 0.001
EPOCHS = 1000000
loss_over_time = []
params = []
prev_loss = None

optimizer = optim.SGD([b0, b1], lr=LEARN_RATE)

for i in range(EPOCHS):
    loss = J(b0, b1)    # calculate loss
    if i%10 == 0:
        print(loss)        
    loss.backward()     # differentiate it w.r.t b0 and b1
    

    loss_over_time.append(loss.item())  # store loss for plotting 
    params.append((b0, b1))

    with torch.no_grad():        
        b0 -= b0.grad * LEARN_RATE
        b1 -= b1.grad * LEARN_RATE
    # optimizer.step()
    # optimizer.zero_grad()
    b0.grad.zero_()     # reset gradient
    b1.grad.zero_()
    if prev_loss and loss.item() > prev_loss:
        print("Breaking at loss: ", loss.item())
        break
    prev_loss = loss.item()
    

print(f"Loss after {i} epochs: {loss_over_time[-1]}")

Backward0>)
tensor(227.8041, grad_fn=<MeanBackward0>)
tensor(227.8011, grad_fn=<MeanBackward0>)
tensor(227.7981, grad_fn=<MeanBackward0>)
tensor(227.7951, grad_fn=<MeanBackward0>)
tensor(227.7921, grad_fn=<MeanBackward0>)
tensor(227.7891, grad_fn=<MeanBackward0>)
tensor(227.7861, grad_fn=<MeanBackward0>)
tensor(227.7832, grad_fn=<MeanBackward0>)
tensor(227.7803, grad_fn=<MeanBackward0>)
tensor(227.7773, grad_fn=<MeanBackward0>)
tensor(227.7744, grad_fn=<MeanBackward0>)
tensor(227.7715, grad_fn=<MeanBackward0>)
tensor(227.7686, grad_fn=<MeanBackward0>)
tensor(227.7657, grad_fn=<MeanBackward0>)
tensor(227.7629, grad_fn=<MeanBackward0>)
tensor(227.7600, grad_fn=<MeanBackward0>)
tensor(227.7572, grad_fn=<MeanBackward0>)
tensor(227.7543, grad_fn=<MeanBackward0>)
tensor(227.7516, grad_fn=<MeanBackward0>)
tensor(227.7487, grad_fn=<MeanBackward0>)
tensor(227.7459, grad_fn=<MeanBackward0>)
tensor(227.7432, grad_fn=<MeanBackward0>)
tensor(227.7404, grad_fn=<MeanBackward0>)
tensor(227.7377, grad_

In [60]:
print(b0.item(), b1.item())

-16.577552795410156 3.874086380004883


In [70]:
loss_x = [x for x in range(0, len(loss_over_time))]
df_loss = pd.DataFrame({'Epoch': loss_x, 'Loss': loss_over_time})
px.line(df_loss, x='Epoch', y='Loss')

In [61]:
x_min = 0
x_max = max(df.speed)

y_start = b1.item()*x_min + b0.item()
y_end = b1.item()*x_max + b0.item()

fig = px.scatter(df, x='speed', y='dist', title="Height-weight relation")

fig.add_trace(go.Scatter(x=(x_min, x_max), y=(y_start, y_end),
                    mode='lines',
                    name='Gradient Descent line'))


fig.show()  # The scatter plot is a nice linear spread

In [62]:
lm = LinearRegression()     # Create linear Regression Object
lm.fit(df[['speed']],df['dist'])   

LinearRegression()

In [63]:
print(lm.intercept_, lm.coef_  )

-17.57909489051095 [3.93240876]


In [64]:
x_min = 0
x_max = max(df.speed)

y_start = lm.coef_*x_min + lm.intercept_
y_end = lm.coef_*x_max + lm.intercept_

fig = px.scatter(df, x='speed', y='dist', title="Height-weight relation")

fig.add_trace(go.Scatter(x=(x_min, x_max), y=(y_start[0], y_end[0]),
                    mode='lines',
                    name='sklearn line'))


fig.show()  # The scatter plot is a nice linear spread