<a href="https://colab.research.google.com/github/Besutodesuka/GenAI/blob/main/2_PyTorch_Tutorials.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#PyTorch Tutorials
## Checking CUDA device

In [None]:
import torch
import numpy as np

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

## Tensors

In [None]:
# Scalars / vectors / matrices
a = torch.tensor(3.14)
b = torch.tensor([1.0, 2.0, 3.0])
c = torch.tensor([[1, 2], [3, 4]])

print("a:", a, "shape:", a.shape, "dtype:", a.dtype)
print("b:", b, "shape:", b.shape, "dtype:", b.dtype)
print("c:\n", c, "shape:", c.shape, "dtype:", c.dtype)

In [None]:
x_np = np.array([1, 2, 3], dtype=np.float32)
x_t = torch.from_numpy(x_np)

print("NumPy:", x_np, type(x_np))
print("Torch:", x_t, type(x_t))

# Convert torch -> numpy
x_back = x_t.numpy()
print("Back to NumPy:", x_back, type(x_back))

In [None]:
x_t.device

In [None]:
x = torch.randn(3, 4, 5, device='cuda:0')
x.device

In [None]:
x_t1 = x_t.to(device='cuda')
x_t1.device

In [None]:
z = torch.zeros(2, 3)
o = torch.ones(2, 3)
r = torch.rand(2, 3)
t = torch.tensor([10, 20, 30])
a = torch.arange(0, 10)

print("zeros:\n", z)
print("ones:\n", o)
print("rand:\n", r)
print("tensor:", t)
print("arange:", a)


In [None]:
z.shape

In [None]:
z.dtype

In [None]:
z.device

## PyTorch Operations

In [None]:
B, C, H, W = 32, 3, 64, 64
images = torch.rand(B, C, H, W)

print("images shape:", images.shape)  # (32, 3, 64, 64)

# Example: flatten per-image into vectors for an MLP
flat = images.view(B, -1)
print("flattened shape:", flat.shape)  # (32, 12288)


In [None]:
images[1].shape

In [None]:
A = torch.ones(2, 3)
b = torch.tensor([10.0, 20.0, 30.0])  # shape (3,)

print("A shape:", A.shape)
print("b shape:", b.shape)

# Broadcasting: b is expanded to (2,3)
C = A + b
print("A + b:\n", C)

# Danger example: (2,3) + (2,1) broadcasts differently
d = torch.tensor([[100.0], [200.0]])  # shape (2,1)
E = A + d
print("A + d:\n", E)


## CUDA vs CPU

In [None]:
%%time

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

x = torch.rand(20000, 20000)
print("x device:", x.device)
y = (x @ x).mean()
print("y cpu:", y.item())

In [None]:
%%time

x_dev = x.to(device)

print("x_dev device:", x_dev.device)

# A quick operation on the chosen device
y = (x_dev @ x_dev).mean()
print("y:", y.item())

## Computational Graph and Autograd

In [None]:
x = torch.tensor(2.0, requires_grad=True)
w = torch.tensor(3.0, requires_grad=True)

y = w * x + 1
loss = (y - 10) ** 2

print("y:", y.item())
print("loss:", loss.item())
print("loss.grad_fn:", loss.grad_fn)  # this is part of a graph


In [None]:
x = torch.tensor(1.0, requires_grad=True)

# Dynamic control flow: graph depends on runtime values
y = x
for i in range(5):
    if y < 2.0:
        y = y * 1.5
    else:
        y = y * 0.5
    print(y)

loss = (y - 1.0) ** 2
loss.backward()

print("final y:", y.item())
print("grad dx:", x.grad)

In [None]:
x = torch.tensor(2.0, requires_grad=True)
y = x**3 + 2*x

# dy/dx = 3x^2 + 2 => at x=2: 3*4+2=14
y.backward()

print("y:", y.item())
print("x.grad (expected 14):", x.grad.item())

In [None]:
x = torch.tensor(2.0, requires_grad=True)

# First backward
y1 = x**2
y1.backward()
print("After y1.backward(), x.grad:", x.grad.item())  # 4

# Second backward accumulates
y2 = 3*x
y2.backward()
print("After y2.backward(), x.grad:", x.grad.item())  # 4 + 3 = 7

# Manually clear
x.grad.zero_()
print("After zero_(), x.grad:", x.grad.item())


## Backpropagation

In [None]:
x = torch.tensor(2.0, requires_grad=True)
w = torch.tensor(3.0, requires_grad=True)

y = w * x
loss = (y - 1) ** 2

loss.backward()

print("loss:", loss.item())
print("dl/dx:", x.grad.item())
print("dl/dw:", w.grad.item())


In [None]:
x = torch.tensor(2.0, requires_grad=True)
w = torch.tensor(3.0, requires_grad=True)

y = w * x
L = (y - 1) ** 2
L.backward()

print("dy/dx =", w.item())
print("dL/dy =", 2 * (y.item() - 1))
print("autograd dL/dx =", x.grad.item())


## Loss Function

In [None]:
y_true = torch.tensor([1.0, 2.0, 3.0])
y_pred = torch.tensor([0.5, 2.5, 2.0], requires_grad=True)

loss = torch.mean((y_pred - y_true) ** 2)
loss.backward()

print("MSE loss:", loss.item())
print("gradients:", y_pred.grad)


In [None]:
import torch.nn.functional as F

logits = torch.tensor([[2.0, 0.5, -1.0]], requires_grad=True)
target = torch.tensor([0])  # class index

loss = F.cross_entropy(logits, target)
loss.backward()

print("Cross-entropy loss:", loss.item())
print("Gradient w.r.t logits:", logits.grad)


## Update Rules and Optimizers

In [None]:
x = torch.tensor(5.0, requires_grad=True)
lr = 0.1

for step in range(5):
    loss = (x - 1) ** 2
    loss.backward()

    with torch.no_grad():
        x -= lr * x.grad
        x.grad.zero_()

    print(f"step {step}: x={x.item():.4f}, loss={loss.item():.4f}")


In [None]:
x = torch.tensor(5.0, requires_grad=True)
optimizer = torch.optim.SGD([x], lr=0.1)

for step in range(5):
    loss = (x - 1) ** 2
    loss.backward()

    optimizer.step()
    optimizer.zero_grad()

    print(f"step {step}: x={x.item():.4f}, loss={loss.item():.4f}")


In [None]:
w = torch.randn(1, requires_grad=True)
optimizer = torch.optim.Adam([w], lr=0.1)

for step in range(10):
    x = torch.tensor(2.0)
    y_true = torch.tensor(4.0)

    y_pred = w * x # 1. Forward pass
    loss = (y_pred - y_true) ** 2 # 2. Compute loss

    loss.backward() # 3. Backward pass
    optimizer.step() # 4. Update parameters
    optimizer.zero_grad() # 5. Zero gradients

    print(f"step {step}: w={w.item():.3f}, loss={loss.item():.3f}")


## Multilayer Perceptrons

In [None]:
import torch.nn as nn

# A simple 1-layer model
model = nn.Linear(2, 1)

print(model)
print("Parameters:")
for name, p in model.named_parameters():
    print(name, p.shape)

In [None]:
import torch.nn as nn

mlp = nn.Sequential(
    nn.Linear(4, 16),
    nn.ReLU(),
    nn.Linear(16, 2)
)

print(mlp)


In [None]:
x = torch.linspace(-2, 2, 5)

linear = nn.Linear(1, 1, bias=False)
linear.weight.data.fill_(1.0)

y_linear = linear(x.unsqueeze(1))
y_relu = torch.relu(y_linear)

print("x:", x)
print("linear:", y_linear.squeeze())
print("relu:", y_relu.squeeze())

In [None]:
x = torch.linspace(-3, 3, 7)
y = torch.relu(x)

print("x:", x)
print("ReLU(x):", y)

In [None]:
model = nn.Sequential(
    nn.Linear(10, 32),
    nn.ReLU(),
    nn.Linear(32, 3)  # 3 classes
)

x = torch.randn(5, 10)  # batch of 5 samples
logits = model(x)

print("logits shape:", logits.shape)


## CNN and Demo

In [None]:
conv = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3)

print(conv)
print("Number of parameters:", sum(p.numel() for p in conv.parameters()))

In [None]:
cnn_block = nn.Sequential(
    nn.Conv2d(3, 16, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2)
)

print(cnn_block)

In [None]:
import torch
import torch.nn as nn

def vgg_block(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2)
    )


In [None]:
class VGGNet(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()

        self.features = nn.Sequential(
            vgg_block(3, 64),     # 32×32 → 16×16
            vgg_block(64, 128),   # 16×16 → 8×8
            vgg_block(128, 256),  # 8×8 → 4×4
        )

        self.classifier = nn.Sequential(
            nn.Linear(256 * 4 * 4, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # flatten
        x = self.classifier(x)
        return x


In [None]:
model = VGGNet(num_classes=10)
print(model)

total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")


In [None]:
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform
)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=64, shuffle=True
)


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [None]:
for epoch in range(5):
    running_loss = 0.0

    for images, labels in trainloader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader):.4f}")


In [None]:
correct = 0
total = 0

model.eval()
with torch.no_grad():
    for images, labels in trainloader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Training accuracy: {100 * correct / total:.2f}%")
