<a href="https://colab.research.google.com/github/Tanish-Sarkar/Elite-Transformers/blob/main/Module0%20-%20PyTorch%20Ramp-Up/00_pytorch_fundamentals_work.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **1. Tensor making**

In [None]:
import torch
x = torch.rand(5,3,device='cuda' if torch.cuda.is_available() else 'cpu')
print("Tensor in device: ", x.device)
print("Matmul shape:", torch.matmul(x, x.T).shape)
print("Reshaped:", x.view(-1).shape)

Tensor in device:  cuda:0
Matmul shape: torch.Size([5, 5])
Reshaped: torch.Size([15])


## **2. Building the nn.Module**

In [None]:
import torch.nn as nn
class MultiheadAttention(nn.Module):
  def __init__(self, d_model=64, num_heads=4):
    super().__init__()
    self.num_heads = num_heads
    self.d_head = d_model // num_heads
    self.qkv_proj = nn.Linear(d_model, d_model * 3) # Corrected line
    self.out_proj = nn.Linear(d_model, d_model)
    self.scale = self.d_head ** -0.5

  def forward(self, x):
    B, T, C = x.shape
    qkv = self.qkv_proj(x).reshape(B,T,3, self.num_heads, self.d_head)
    q,k,v = qkv.unbind(2)
    attn = torch.matmul(q,k.transpose(-2, -1)) * self.scale
    attn = attn.softmax(dim=-1)
    out = torch.matmul(attn, v)
    out = out.transpose(1, 2).reshape(B,T,C)
    return self.out_proj(out)


# Testing
model = MultiheadAttention()
x = torch.rand(2,10,64)
out = model(x)
print("Multi-Head output shape: ", out.shape)

Multi-Head output shape:  torch.Size([2, 10, 64])


## **3. NLP**

In [None]:
import requests
from torch.utils.data import Dataset, DataLoader

text = requests.get("https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt").text[:50000]

chars = sorted(list(set(text)))
stoi = {ch:i for i,ch in enumerate(chars)}
itos = {i:ch for i,ch in enumerate(chars)}
vocab_size = len(chars)

class CharDataset(Dataset):
    def __init__(self, data, block_size=128):
        self.data = [stoi[c] for c in data if c in stoi]
        self.block_size = block_size
    def __len__(self): return len(self.data) - self.block_size
    def __getitem__(self, i):
        chunk = self.data[i:i+self.block_size+1]
        return torch.tensor(chunk[:-1]), torch.tensor(chunk[1:])

dataset = CharDataset(text)
loader = DataLoader(dataset, batch_size=8, shuffle=True)
xb, yb = next(iter(loader))
print("Batch shapes:", xb.shape, yb.shape)
print("Sample text:", ''.join(itos[i.item()] for i in xb[0][:50]))

Batch shapes: torch.Size([8, 128]) torch.Size([8, 128])
Sample text: those
That best can aid your action.

MARCIUS:
Tho
