##  PyTorch Fundamentals

### PyTorch Tensors

In [4]:
import torch

X = torch.tensor([[1.0, 7.0, 3.0],[4.0, 5.0, 6.0]])
X

tensor([[1., 7., 3.],
        [4., 5., 6.]])

In [5]:
X.shape

torch.Size([2, 3])

In [6]:
X.dtype

torch.float32

In [7]:
X[0, 2]

tensor(3.)

In [8]:
X[:, 2]

tensor([3., 6.])

In [9]:
10 * (X + 1)

tensor([[20., 80., 40.],
        [50., 60., 70.]])

In [10]:
X.exp()

tensor([[   2.7183, 1096.6332,   20.0855],
        [  54.5982,  148.4132,  403.4288]])

In [11]:
X.mean()

tensor(4.3333)

In [12]:
X.max(0)  

torch.return_types.max(
values=tensor([4., 7., 6.]),
indices=tensor([1, 0, 1]))

In [13]:
X.T

tensor([[1., 4.],
        [7., 5.],
        [3., 6.]])

In [14]:
X.T.shape

torch.Size([3, 2])

In [15]:
X @ X.T

tensor([[59., 57.],
        [57., 77.]])

In [16]:
import numpy as np

X.numpy()

array([[1., 7., 3.],
       [4., 5., 6.]], dtype=float32)

In [17]:
np.array([[1.0, 7.0, 3.0],[4.0, 5.0, 6.0]])

array([[1., 7., 3.],
       [4., 5., 6.]])

In [18]:
torch.tensor(np.array([[1.0, 7.0, 3.0],[4.0, 5.0, 6.0]]))

tensor([[1., 7., 3.],
        [4., 5., 6.]], dtype=torch.float64)

In [19]:
torch.tensor(np.array([[1.0, 7.0, 3.0],[4.0, 5.0, 6.0]]),dtype =torch.float32)

tensor([[1., 7., 3.],
        [4., 5., 6.]])

In [20]:
torch.FloatTensor(np.array([[1.0, 7.0, 3.0],[4.0, 5.0, 6.0]]))

tensor([[1., 7., 3.],
        [4., 5., 6.]])

In [21]:
X2_np = np.array([[1.0, 7.0, 3.0],[4.0, 5.0, 6.0]])
X2 = torch.from_numpy(X2_np)
X2_np[0,1] = 33
X2

tensor([[ 1., 33.,  3.],
        [ 4.,  5.,  6.]], dtype=torch.float64)

In [22]:
X[:,1] = -44
X

tensor([[  1., -44.,   3.],
        [  4., -44.,   6.]])

In [23]:
X.relu()

tensor([[1., 0., 3.],
        [4., 0., 6.]])

## Hardward Acceleration

In [25]:
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

In [26]:
device

'mps'

In [27]:
# First CPU → then GPU
M = torch.tensor([[1.0, 7.0, 3.0],[4.0, 5.0, 6.0]])
M = M.to(device)
M.device

device(type='mps', index=0)

In [28]:
# Direct GPU
M = torch.tensor([[1.0, 7.0, 3.0],[4.0, 5.0, 6.0]], device=device)
M.device

device(type='mps', index=0)

In [29]:
R = M @ M.T
R

tensor([[59., 57.],
        [57., 77.]], device='mps:0')

In [30]:
M = torch.rand((2000,2000))
M @ M.T # warmup
%timeit M @ M.T

M = M.to(device)
M @ M.T # warmup
%timeit M @ M.T

11.2 ms ± 387 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
10.6 ms ± 130 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## Autograd

In [32]:
X = torch.tensor(4.0, requires_grad = True)
f = X ** 2
f

tensor(16., grad_fn=<PowBackward0>)

In [33]:
f.backward()
X.grad

tensor(8.)

In [34]:
learning_rate = 0.1
with torch.no_grad():
    X -= learning_rate * X.grad

In [35]:
X

tensor(3.2000, requires_grad=True)

In [36]:
X_detached = X.detach()
X_detached -= learning_rate * X.grad
X_detached

tensor(2.4000)

In [37]:
X.grad.zero_()   # resets gradients not value

tensor(0.)

In [38]:
X

tensor(2.4000, requires_grad=True)

In [39]:
learning_rate = 0.01
x = torch.tensor(4.0,requires_grad = True)
for iterations in range(50):
    f = x ** 2  # forward pass
    f.backward() # backward pass
    with torch.no_grad():
        x -= learning_rate * x.grad # gradient descent step
    x.grad.zero_()
    print(x )
    

tensor(3.9200, requires_grad=True)
tensor(3.8416, requires_grad=True)
tensor(3.7648, requires_grad=True)
tensor(3.6895, requires_grad=True)
tensor(3.6157, requires_grad=True)
tensor(3.5434, requires_grad=True)
tensor(3.4725, requires_grad=True)
tensor(3.4031, requires_grad=True)
tensor(3.3350, requires_grad=True)
tensor(3.2683, requires_grad=True)
tensor(3.2029, requires_grad=True)
tensor(3.1389, requires_grad=True)
tensor(3.0761, requires_grad=True)
tensor(3.0146, requires_grad=True)
tensor(2.9543, requires_grad=True)
tensor(2.8952, requires_grad=True)
tensor(2.8373, requires_grad=True)
tensor(2.7805, requires_grad=True)
tensor(2.7249, requires_grad=True)
tensor(2.6704, requires_grad=True)
tensor(2.6170, requires_grad=True)
tensor(2.5647, requires_grad=True)
tensor(2.5134, requires_grad=True)
tensor(2.4631, requires_grad=True)
tensor(2.4139, requires_grad=True)
tensor(2.3656, requires_grad=True)
tensor(2.3183, requires_grad=True)
tensor(2.2719, requires_grad=True)
tensor(2.2265, requi

## Implementation of Linear Regression

In [41]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data,
                                                        housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full,y_train_full,
                                                     random_state=42)

In [42]:
X_train.dtype

dtype('float64')

In [43]:
X_train = torch.FloatTensor(X_train)
X_valid = torch.FloatTensor(X_valid)
X_test = torch.FloatTensor(X_test)

In [44]:
X_train.shape

torch.Size([11610, 8])

In [45]:
X_train.dtype

torch.float32

In [46]:
X_train[:4]

tensor([[ 3.5214e+00,  1.5000e+01,  3.0499e+00,  1.1065e+00,  1.4470e+03,
          1.6060e+00,  3.7630e+01, -1.2243e+02],
        [ 5.3275e+00,  5.0000e+00,  6.4901e+00,  9.9105e-01,  3.4640e+03,
          3.4433e+00,  3.3690e+01, -1.1739e+02],
        [ 3.1000e+00,  2.9000e+01,  7.5424e+00,  1.5915e+00,  1.3280e+03,
          2.2508e+00,  3.8440e+01, -1.2298e+02],
        [ 7.1736e+00,  1.2000e+01,  6.2890e+00,  9.9744e-01,  1.0540e+03,
          2.6957e+00,  3.3550e+01, -1.1770e+02]])

In [47]:
means = X_train.mean(dim=0, keepdims=True)    # row vector
std = X_train.std(dim=0, keepdims=True)

In [48]:
X_train = (X_train - means) / std
X_valid = (X_valid - means) / std
X_test = (X_test - means) / std
X_train[:4]

tensor([[-0.1940, -1.0778, -0.9433,  0.0149,  0.0207, -0.5729,  0.9292, -1.4221],
        [ 0.7520, -1.8688,  0.4055, -0.2333,  1.8614,  0.2052, -0.9165,  1.0966],
        [-0.4147,  0.0297,  0.8181,  1.0567, -0.0879, -0.2998,  1.3087, -1.6970],
        [ 1.7188, -1.3151,  0.3266, -0.2195, -0.3379, -0.1115, -0.9821,  0.9417]])

In [49]:
a = torch.tensor([[[1,2],[3,4]]])
a.shape

torch.Size([1, 2, 2])

In [50]:
a = a.view(-1,1)  # column vector

In [51]:
a

tensor([[1],
        [2],
        [3],
        [4]])

In [52]:
a.shape

torch.Size([4, 1])

In [53]:
y_train = torch.FloatTensor(y_train).view(-1,1)
y_valid = torch.FloatTensor(y_valid).view(-1,1)
y_test = torch.FloatTensor(y_test).view(-1,1)
y_train.shape

torch.Size([11610, 1])

In [54]:
torch.manual_seed(42)
n_features = X_train.shape[1]
w = torch.randn((n_features,1),requires_grad=True)
b = torch.tensor(0., requires_grad=True)

In [55]:
w

tensor([[ 0.3367],
        [ 0.1288],
        [ 0.2345],
        [ 0.2303],
        [-1.1229],
        [-0.1863],
        [ 2.2082],
        [-0.6380]], requires_grad=True)

In [56]:
b

tensor(0., requires_grad=True)

In [57]:
learning_rate = 0.4
n_epochs = 20
for epoch in range(n_epochs):
    y_pred = X_train @ w + b
    loss = ((y_pred-y_train)**2).mean()
    loss.backward()
    with torch.no_grad():
        b -= learning_rate * b.grad
        w -= learning_rate * w.grad
        b.grad.zero_()
        w.grad.zero_()
    print(f"Epoch:{epoch+1}/{n_epochs}, Loss : {loss.item()}")
        

Epoch:1/20, Loss : 16.158458709716797
Epoch:2/20, Loss : 4.87937593460083
Epoch:3/20, Loss : 2.255227565765381
Epoch:4/20, Loss : 1.330764651298523
Epoch:5/20, Loss : 0.9680710434913635
Epoch:6/20, Loss : 0.8142688870429993
Epoch:7/20, Loss : 0.7417054176330566
Epoch:8/20, Loss : 0.702070951461792
Epoch:9/20, Loss : 0.6765925288200378
Epoch:10/20, Loss : 0.65779709815979
Epoch:11/20, Loss : 0.6426157355308533
Epoch:12/20, Loss : 0.6297228336334229
Epoch:13/20, Loss : 0.6184946298599243
Epoch:14/20, Loss : 0.6085972189903259
Epoch:15/20, Loss : 0.5998220443725586
Epoch:16/20, Loss : 0.5920190215110779
Epoch:17/20, Loss : 0.5850694179534912
Epoch:18/20, Loss : 0.5788736343383789
Epoch:19/20, Loss : 0.5733456015586853
Epoch:20/20, Loss : 0.5684102773666382


In [58]:
y_test[:3]

tensor([[0.4770],
        [0.4580],
        [5.0000]])

In [59]:
X_new = X_test[:3]
with torch.no_grad():
    y_pred = X_new @ w + b

In [60]:
y_pred

tensor([[0.8916],
        [1.6480],
        [2.6577]])