In [1]:
import torch
torch.__version__

'2.9.1+cu128'

In [2]:
M = torch.rand((1000,1000))
M.device

device(type='cpu')

In [3]:
%timeit M @ M.T

6.7 ms ± 145 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [4]:
M = M.cuda()
M.device

device(type='cuda', index=0)

In [5]:
M = torch.rand((1000, 1000), device="cuda")


In [6]:
%timeit M @ M.T

1.58 ms ± 491 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [7]:
x = torch.tensor(5.0, requires_grad=True)
f = x ** 2
f

tensor(25., grad_fn=<PowBackward0>)

In [8]:
f.backward()

In [9]:
x.grad

tensor(10.)

In [10]:
# x.grad.zero_()

In [11]:
x.grad

tensor(10.)

In [12]:
x

tensor(5., requires_grad=True)

In [13]:
x.grad

tensor(10.)

In [14]:
learning_rate = 0.1
with torch.no_grad():
    x -= learning_rate * x.grad

In [15]:
x

tensor(4., requires_grad=True)

In [16]:
learning_rate = 0.1
x = torch.tensor(5.0, requires_grad=True)
for iteration in range(100):
    f = x ** 2
    f.backward()
    with torch.no_grad():
        x -= learning_rate * x.grad
    x.grad.zero_()

x.round()

tensor(0., grad_fn=<RoundBackward0>)

z += 1 doesnt work as it tries to modify the tensor in place but z = z + 1 works as a new tensor is created and assigned to z, but the original tensor is unchanged and recorded in the computation graph of the final tensor.

In [17]:
t = torch.tensor(2.0, requires_grad=True)
z = t.exp()
z = z + 1
# with torch.autograd.set_detect_anomaly(True):
z.backward()

Some operation such as exp(), relu(), rsqrt(), sigmoid(), sqrt(), 
tan(), tanh() save their outputs in the computation graph during forward pass and use these outputs to compute the gradients during the backward pass and hence they throw error when we modify them in place.

Whereas some operations such as abs(), cos(), log(), sin(), square(), var() save their inputs instead of outputs.

In [18]:
t.grad

tensor(7.3891)

In [19]:
# Use !wget with ! at the beginning inside Colab
!wget https://ndownloader.figshare.com/files/5976036

--2026-01-04 22:29:32--  https://ndownloader.figshare.com/files/5976036
Resolving ndownloader.figshare.com (ndownloader.figshare.com)... 34.243.115.126, 34.253.130.127, 34.243.228.51, ...
Connecting to ndownloader.figshare.com (ndownloader.figshare.com)|34.243.115.126|:443... connected.
HTTP request sent, awaiting response... 403 Forbidden
2026-01-04 22:29:33 ERROR 403: Forbidden.



In [20]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
# housing = fetch_california_housing() 

In [21]:
housing = pd.read_csv("datasets/housing/housing.csv")

In [22]:
housing.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [84]:
housing["total_bedrooms"] = housing["total_bedrooms"].fillna(housing["total_bedrooms"].mean())

In [85]:
housing["average_houses"] = housing["population"] / housing["households"]
housing["average_rooms"] = housing["total_rooms"] / housing["average_houses"]
housing["average_bedrooms"] = housing["total_bedrooms"] / housing["average_houses"]
housing["average_members"] = housing["population"] / housing["households"]

In [81]:
housing["total_bedrooms"].mean()

np.float64(537.8705525375618)

In [None]:
# housing["total_bedrooms"] = housing["total_bedrooms"].fillna(housing["total_bedrooms"].mean())

In [86]:
housing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   longitude           20640 non-null  float64
 1   latitude            20640 non-null  float64
 2   housing_median_age  20640 non-null  float64
 3   total_rooms         20640 non-null  float64
 4   total_bedrooms      20640 non-null  float64
 5   population          20640 non-null  float64
 6   households          20640 non-null  float64
 7   median_income       20640 non-null  float64
 8   median_house_value  20640 non-null  float64
 9   ocean_proximity     20640 non-null  object 
 10  average_houses      20640 non-null  float64
 11  average_rooms       20640 non-null  float64
 12  average_bedrooms    20640 non-null  float64
 13  average_members     20640 non-null  float64
dtypes: float64(13), object(1)
memory usage: 2.2+ MB


In [87]:
X = housing.drop(columns=["ocean_proximity", "median_house_value", "total_rooms", "total_bedrooms", "average_houses", "households"])
y = housing["median_house_value"]


In [88]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   longitude           20640 non-null  float64
 1   latitude            20640 non-null  float64
 2   housing_median_age  20640 non-null  float64
 3   population          20640 non-null  float64
 4   median_income       20640 non-null  float64
 5   average_rooms       20640 non-null  float64
 6   average_bedrooms    20640 non-null  float64
 7   average_members     20640 non-null  float64
dtypes: float64(8)
memory usage: 1.3 MB


In [89]:
y

0        452600.0
1        358500.0
2        352100.0
3        341300.0
4        342200.0
           ...   
20635     78100.0
20636     77100.0
20637     92300.0
20638     84700.0
20639     89400.0
Name: median_house_value, Length: 20640, dtype: float64

In [90]:
housing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   longitude           20640 non-null  float64
 1   latitude            20640 non-null  float64
 2   housing_median_age  20640 non-null  float64
 3   total_rooms         20640 non-null  float64
 4   total_bedrooms      20640 non-null  float64
 5   population          20640 non-null  float64
 6   households          20640 non-null  float64
 7   median_income       20640 non-null  float64
 8   median_house_value  20640 non-null  float64
 9   ocean_proximity     20640 non-null  object 
 10  average_houses      20640 non-null  float64
 11  average_rooms       20640 non-null  float64
 12  average_bedrooms    20640 non-null  float64
 13  average_members     20640 non-null  float64
dtypes: float64(13), object(1)
memory usage: 2.2+ MB


### Implementing Linear Regression

Linear Regression using Tensors and Autograd

In [91]:
from sklearn.model_selection import train_test_split

X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y, random_state=42
)

In [92]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)

In [93]:
X_train

Unnamed: 0,longitude,latitude,housing_median_age,population,median_income,average_rooms,average_bedrooms,average_members
16860,-117.42,33.94,35.0,1094.0,4.1528,569.188300,104.867459,3.099150
12184,-118.34,34.09,14.0,1691.0,2.2000,1507.931402,496.841514,2.010702
19319,-121.85,37.44,8.0,241.0,7.3090,97.219917,13.921162,4.381818
10523,-118.08,33.84,25.0,2827.0,3.3438,1124.357977,289.911567,3.287209
12294,-120.17,39.33,10.0,195.0,0.9283,299.128205,68.692308,2.052632
...,...,...,...,...,...,...,...,...
5914,-117.95,33.83,31.0,1348.0,4.9394,741.745549,119.181751,3.263923
16052,-118.81,34.25,4.0,3950.0,5.7160,3846.371392,768.265063,2.378085
13592,-118.53,34.38,18.0,2305.0,3.2270,592.596963,157.214317,3.860972
2089,-122.24,37.47,41.0,455.0,5.1071,444.600000,76.292308,2.660819


In [94]:
import numpy as np
X_train = X_train.to_numpy()
X_valid = X_valid.to_numpy()
X_test = X_test.to_numpy()

X_train = torch.FloatTensor(X_train)
X_valid = torch.FloatTensor(X_valid)
X_test = torch.FloatTensor(X_test)

In [95]:
means = X_train.mean(dim=0, keepdim=True)
stds = X_train.std(dim = 0, keepdim=True)
X_train = (X_train - means) / stds
X_valid = (X_valid - means) / stds
X_test = (X_test - means) / stds

In [96]:
y_train = torch.FloatTensor(y_train.values).view(-1, 1)
y_test = torch.FloatTensor(y_test.values).view(-1,1)
y_valid = torch.FloatTensor(y_valid.values).view(-1,1)

In [97]:
torch.manual_seed(42)
n_features = X_train.shape[1]
w = torch.randn((n_features, 1), requires_grad=True)
b = torch.tensor(0., requires_grad=True)

In [98]:
w, b

(tensor([[ 0.3367],
         [ 0.1288],
         [ 0.2345],
         [ 0.2303],
         [-1.1229],
         [-0.1863],
         [ 2.2082],
         [-0.6380]], requires_grad=True),
 tensor(0., requires_grad=True))

In [99]:
X_train[0]

tensor([ 1.0682, -0.7920,  0.5019, -0.2897,  0.1516, -0.4823, -0.5448, -0.0048])

In [101]:
learning_rate = 0.4
n_epochs = 20
for epoch in range(n_epochs):
    y_pred = X_train @ w + b
    # print(f"y_pred:{y_pred}")
    loss = ((y_pred - y_train) ** 2).mean()
    loss.backward()

    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad
        w.grad.zero_()
        b.grad.zero_()
    print(f"Epoch:{epoch + 1}/ {n_epochs},Loss: {loss.item()}," )

Epoch:1/ 20,Loss: 87536041984.0,
Epoch:2/ 20,Loss: 117107113984.0,
Epoch:3/ 20,Loss: 157267361792.0,
Epoch:4/ 20,Loss: 211805962240.0,
Epoch:5/ 20,Loss: 285868720128.0,
Epoch:6/ 20,Loss: 386443116544.0,
Epoch:7/ 20,Loss: 523017551872.0,
Epoch:8/ 20,Loss: 708476534784.0,
Epoch:9/ 20,Loss: 960315195392.0,
Epoch:10/ 20,Loss: 1302292201472.0,
Epoch:11/ 20,Loss: 1766667321344.0,
Epoch:12/ 20,Loss: 2397249732608.0,
Epoch:13/ 20,Loss: 3253525282816.0,
Epoch:14/ 20,Loss: 4416271810560.0,
Epoch:15/ 20,Loss: 5995176656896.0,
Epoch:16/ 20,Loss: 8139185848320.0,
Epoch:17/ 20,Loss: 11050564976640.0,
Epoch:18/ 20,Loss: 15003949203456.0,
Epoch:19/ 20,Loss: 20372286078976.0,
Epoch:20/ 20,Loss: 27662005305344.0,


### Linear Regression using Pytorch's high level api

In [102]:
import torch.nn as nn

torch.manual_seed(42)
model = nn.Linear(in_features=n_features, out_features=1)
model.weight, model.bias

(Parameter containing:
 tensor([[ 0.2703,  0.2935, -0.0828,  0.3248, -0.0775,  0.0713, -0.1721,  0.2076]],
        requires_grad=True),
 Parameter containing:
 tensor([0.3117], requires_grad=True))

In [103]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss = nn.MSELoss()

In [106]:
def train_bgd(model, optimizer, criterion, X_train, y_train, n_epochs):
    for epoch in range(n_epochs):
        y_pred = model(X_train)
        loss = criterion(y_pred, y_train)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        print(f"Epoch {epoch + 1}/{n_epochs}, Loss: {loss.item()}")

In [107]:
train_bgd(model, optimizer, loss, X_train, y_train, n_epochs)

Epoch 1/20, Loss: 55644086272.0
Epoch 2/20, Loss: 8203159552.0
Epoch 3/20, Loss: 6102135808.0
Epoch 4/20, Loss: 6005691904.0
Epoch 5/20, Loss: 6071224320.0
Epoch 6/20, Loss: 6215290880.0
Epoch 7/20, Loss: 6449275392.0
Epoch 8/20, Loss: 6798994944.0
Epoch 9/20, Loss: 7301598208.0
Epoch 10/20, Loss: 8008439296.0
Epoch 11/20, Loss: 8989797376.0
Epoch 12/20, Loss: 10341515264.0
Epoch 13/20, Loss: 12194027520.0
Epoch 14/20, Loss: 14724708352.0
Epoch 15/20, Loss: 18174640128.0
Epoch 16/20, Loss: 22871353344.0
Epoch 17/20, Loss: 29259771904.0
Epoch 18/20, Loss: 37944238080.0
Epoch 19/20, Loss: 49745440768.0
Epoch 20/20, Loss: 65778012160.0
