## Linear Regression

In [122]:
import numpy as np
from sklearn.linear_model import LinearRegression
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torchviz import make_dot

In [123]:
# Data Generation
N=100
true_w=2
true_b=1
np.random.seed(42) 
epsilon=0.1*np.random.randn(N,1)
x=np.random.randn(N,1)
y=true_w*x+true_b+epsilon

idx=np.arange(N)
np.random.shuffle(idx)
train_idx=idx[:int(N*0.8)]
valid_idx=idx[int(N*0.8):]

X_train,y_train=x[train_idx],y[train_idx]
X_valid,y_valid=x[valid_idx],y[valid_idx]

In [124]:
# step 0: Random nitiation of parameters
np.random.seed(42)
w=np.random.randn(1)
b=np.random.randn(1)

print(b,w)

# Initialization of Hyperparameters
lr=0.1
n_epochs=100
# step 1: Forward pass
for epoch in tqdm(range(n_epochs)):
    y_hat=w*X_train+b

# step 2: Computing Loss 
    loss=np.mean(np.square(y_train-y_hat))
    
# step 3: Computing Gradient
    b_grad=2*np.mean(y_hat-y_train)
    w_grad=2*np.mean(X_train*(y_hat-y_train))

# step 4: Updating Parameters
    b=b-lr*b_grad
    w=w-lr*w_grad


print(f"b: {b[0]},\tw: {w[0]}")



[-0.1382643] [0.49671415]


  0%|          | 0/100 [00:00<?, ?it/s]

b: 0.9953649757559992,	w: 1.9849522275624543


### Compare coefficients

In [125]:
model=LinearRegression()
model.fit(X_train,y_train)
print(f"b:{model.intercept_[0]}, \tw:{model.coef_[0][0]}")

b:0.9953649899692825, 	w:1.9849523172037076


## PyTorch Tensors

In [126]:
scalar=torch.tensor(3.14159)
vector=torch.tensor([1,2,3])
matrix=torch.ones((2,3))
tensor=torch.randn((2,2,3),dtype=torch.float32) # 2 2x3 tensors

print(f"scalar: {scalar},\nvector: {vector},\nmatrix: {matrix},\ntensor: {tensor}")

scalar: 3.141590118408203,
vector: tensor([1, 2, 3]),
matrix: tensor([[1., 1., 1.],
        [1., 1., 1.]]),
tensor: tensor([[[-0.1044, -0.0098, -0.4790],
         [ 2.1484,  1.7330,  1.0709]],

        [[-1.3601,  0.7459,  0.6806],
         [ 0.3519,  0.1171, -2.0195]]])


In [127]:
# shape of a tensor
print(tensor.shape, tensor.size())

torch.Size([2, 2, 3]) torch.Size([2, 2, 3])


In [128]:
scalar.size(),scalar.shape # scalars have empty shape beacuse they are dimensionless

(torch.Size([]), torch.Size([]))

* `view()` method returns a shape with desired shape that shares underlying data of original tensor  
* `reshape()` method may or may not create a copy

In [129]:
same_matrix=matrix.view(1,6)
same_matrix[0,1]=5

print(f"{matrix}\n\n{same_matrix}")

tensor([[1., 5., 1.],
        [1., 1., 1.]])

tensor([[1., 5., 1., 1., 1., 1.]])


`new_tensor()`, `clone()` ---> duplicate data in memory 

In [130]:
# copy the tensor to new one
different_matrix=matrix.new_tensor(matrix.view(1,6))
different_matrix[0,1]=7

print(f"{matrix}\n\n{different_matrix}")


tensor([[1., 5., 1.],
        [1., 1., 1.]])

tensor([[1., 7., 1., 1., 1., 1.]])


  different_matrix=matrix.new_tensor(matrix.view(1,6))


pytorch prefer **clone()** with **detach()**

In [131]:
another_matrix=matrix.view(1,6).clone().detach()  # detach method remove the tensor from the computational graph
another_matrix[0,1]=8 

print(f"{matrix}\n\n{another_matrix}")

tensor([[1., 5., 1.],
        [1., 1., 1.]])

tensor([[1., 8., 1., 1., 1., 1.]])


Both `as_tensor()` and `from_numpy()` shares underlying data with original numpy array

In [132]:
X_train_tensor=torch.as_tensor(X_train)
X_train.dtype,X_train_tensor.dtype

(dtype('float64'), torch.float64)

In [133]:
X_train_tensor=X_train_tensor.float() # lower precision float occupy less memory and are faster to compute
X_train_tensor.dtype

torch.float32

In [134]:
# experiment with the tensor
dummy_array=np.array([1,2,3])
dummy_tensor=torch.as_tensor(dummy_array)
dummy_array[1]=0

dummy_tensor

tensor([1, 0, 3], dtype=torch.int32)

`torch.tensor()` always make a copy of data

In [135]:
dummy_tensor2=torch.tensor(dummy_array)
dummy_array[1]=9

dummy_tensor2

tensor([1, 0, 3], dtype=torch.int32)

Transform tensor to numpy array

In [150]:
print(f'{dummy_tensor.numpy()}  --->\t{type(dummy_tensor.numpy())}')

[1 9 3]  --->	<class 'numpy.ndarray'>


## CPU/GPU

In [152]:
device='cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [154]:
# How many GPUs and Which model they are
n_cudas=torch.cuda.device_count()
for i in range(n_cudas):
    print(torch.cuda.get_device_name(i))

In [157]:
# turn tensor into cuda tensor
gpu_tensor=torch.as_tensor(X_train).to(device)
gpu_tensor[0]

tensor([-1.2457], dtype=torch.float64)

In [160]:
device='cuda' if torch.cuda.is_available() else 'cpu'

# we need to transform our numpy arrays into tensors and send them to chosen device
X_train_tensor=torch.as_tensor(X_train).float().to(device)
y_train_tensor=torch.tensor(y_train).float().to(device)



`.type()` is more useful since it tells us where the tensor is (device)

In [163]:
print(f"{X_train_tensor.type()}\n\n{X_train_tensor.dtype}\n\n{type(X_train_tensor)}")

torch.FloatTensor

torch.float32

<class 'torch.Tensor'>


In [167]:
# Turn a cuda tensor back to numpy array
back_to_numpy=X_train_tensor.numpy() # this will throw an error if the tensor is on GPU

back_to_numpy=X_train_tensor.cpu().numpy() # this will work