# ML Advanced Topics — Playbook (Object Detection, Face, RNN, Embeddings, Seq2Seq, Transformer)
This notebook contains compact, runnable code snippets demonstrating core concepts: IoU/NMS/anchors, triplet loss, LSTM, skip-gram NS, Transformer seq2seq and beam search. Run cell-by-cell.

In [None]:
# Setup - imports, seed, device
import math, random, time, itertools
from collections import Counter, defaultdict
import numpy as np
import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split, TensorDataset
from torchvision import transforms, datasets
from pathlib import Path
seed=42
random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('device:', device)


## Object Detection — IoU, NMS, Anchors

In [None]:

import numpy as np
def iou_xyxy(a, b):
    a = np.array(a); b = np.array(b)
    if b.ndim==1: b = b.reshape(1,4)
    x1 = np.maximum(a[0], b[:,0]); y1 = np.maximum(a[1], b[:,1])
    x2 = np.minimum(a[2], b[:,2]); y2 = np.minimum(a[3], b[:,3])
    inter = np.maximum(0, x2-x1) * np.maximum(0, y2-y1)
    area_a = (a[2]-a[0])*(a[3]-a[1]); area_b = (b[:,2]-b[:,0])*(b[:,3]-b[:,1])
    return inter / (area_a + area_b - inter + 1e-8)

def nms(boxes, scores, iou_th=0.5):
    idxs = np.argsort(scores)[::-1]; keep=[]
    while idxs.size>0:
        i = idxs[0]; keep.append(int(i))
        if idxs.size==1: break
        ious = iou_xyxy(boxes[i], boxes[idxs[1:]])
        idxs = idxs[1:][ious <= iou_th]
    return keep

def generate_anchors(image_size=(224,224), grid=(7,7), scales=[0.5,1.0,2.0], ratios=[0.5,1.0,2.0]):
    ih,iw = image_size; gh,gw = grid; anchors=[]
    for i in range(gh):
        for j in range(gw):
            cx = (j+0.5)*iw/gw; cy = (i+0.5)*ih/gh
            for s in scales:
                for r in ratios:
                    w = s*iw/gw*math.sqrt(r); h = s*ih/gh/math.sqrt(r)
                    anchors.append([cx-w/2, cy-h/2, cx+w/2, cy+h/2])
    return np.array(anchors)

print('IoU demo:', iou_xyxy([30,30,120,160], [[28,25,118,158],[200,200,260,260]]))
print('NMS demo keep:', nms(np.array([[28,25,118,158],[31,35,122,162],[200,200,260,260]]), np.array([0.9,0.7,0.6]), 0.4))
print('Anchors count:', generate_anchors((224,224),(7,7)).shape)


## Face Recognition — Embedding & Triplet Loss (demo)

In [None]:

import torch.nn.functional as F
class SmallEmbedder(nn.Module):
    def __init__(self, embed_dim=64):
        super().__init__()
        self.net = nn.Sequential(nn.Flatten(), nn.Linear(3*32*32,256), nn.ReLU(), nn.Linear(256,embed_dim))
    def forward(self,x):
        return F.normalize(self.net(x), p=2, dim=1)

def triplet_loss(anc, pos, neg, margin=0.2):
    pos_d = F.pairwise_distance(anc,pos); neg_d = F.pairwise_distance(anc,neg)
    return F.relu(pos_d - neg_d + margin).mean()

# demo forward
embed = SmallEmbedder(32).to(device)
anc = torch.randn(4,3,32,32).to(device); pos = torch.randn(4,3,32,32).to(device); neg = torch.randn(4,3,32,32).to(device)
print('triplet demo loss:', triplet_loss(embed(anc), embed(pos), embed(neg)).item())


## RNN / LSTM — Tiny sequence classifier

In [None]:

class ToySeqDataset(Dataset):
    def __init__(self, n=500, L=20, vocab=10):
        self.X = torch.randint(0, vocab, (n,L))
        self.y = (self.X.sum(dim=1) > (L*vocab//2)).long()
    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

class LSTMClassifier(nn.Module):
    def __init__(self, vocab=10, emb=16, hidden=64):
        super().__init__()
        self.emb = nn.Embedding(vocab, emb)
        self.lstm = nn.LSTM(emb, hidden, batch_first=True)
        self.fc = nn.Linear(hidden, 2)
    def forward(self, x):
        x = self.emb(x); out,_ = self.lstm(x); return self.fc(out[:,-1,:])

ds = ToySeqDataset(600); tr, dev = random_split(ds, [480,120])
tr_loader = DataLoader(tr, batch_size=64, shuffle=True); dv_loader = DataLoader(dev, batch_size=128)
model = LSTMClassifier().to(device); opt = torch.optim.Adam(model.parameters(), lr=1e-3); loss_fn = nn.CrossEntropyLoss()
for epoch in range(2):
    model.train()
    for xb,yb in tr_loader:
        xb,yb = xb.to(device), yb.to(device)
        loss = loss_fn(model(xb), yb)
        opt.zero_grad(); loss.backward(); opt.step()
    model.eval()
    with torch.no_grad():
        ys,ph=[] , []
        for xb,yb in dv_loader:
            xb,yb=xb.to(device), yb.to(device)
            ph.append(model(xb).argmax(1).cpu())
            ys.append(yb.cpu())
        acc = (torch.cat(ys)==torch.cat(ph)).float().mean().item()
    print('epoch', epoch+1, 'dev_acc', round(acc,3))


## Word Embeddings — Tiny Skip-Gram with Negative Sampling (toy)

In [None]:

corpus = "we like machine learning we like embeddings".split()
vocab = list(dict.fromkeys(corpus))
w2i = {w:i for i,w in enumerate(vocab)}

pairs=[]; window=2
for i,w in enumerate(corpus):
    for j in range(max(0,i-window), min(len(corpus), i+window+1)):
        if i==j: continue
        pairs.append((w2i[w], w2i[corpus[j]]))

class SGNS(nn.Module):
    def __init__(self, V, D=8):
        super().__init__()
        self.in_emb = nn.Embedding(V,D); self.out_emb = nn.Embedding(V,D)
    def forward(self, c, o, negs):
        v = self.in_emb(c); u = self.out_emb(o)
        pos = (v*u).sum(dim=1); pos_loss = F.logsigmoid(pos)
        neg = self.out_emb(negs)  # B,K,D
        neg_score = torch.bmm(neg.neg(), v.unsqueeze(2)).squeeze()
        neg_loss = F.logsigmoid(neg_score).sum(dim=1)
        return -(pos_loss + neg_loss).mean()

V = len(vocab)
model = SGNS(V, D=8).to(device); opt = torch.optim.Adam(model.parameters(), lr=1e-2)
pairs = np.array(pairs)
for epoch in range(3):
    np.random.shuffle(pairs)
    losses=[]
    for i in range(0, len(pairs), 4):
        batch = pairs[i:i+4]
        c = torch.LongTensor(batch[:,0]).to(device); o = torch.LongTensor(batch[:,1]).to(device)
        negs = torch.randint(0, V, (len(batch), 5)).to(device)
        loss = model(c,o,negs); opt.zero_grad(); loss.backward(); opt.step(); losses.append(loss.item())
    print('epoch', epoch+1, 'loss', np.mean(losses))


## Seq2Seq & Attention — Transformer encoder-decoder demo + beam search

In [None]:

TOK={'<pad>':0,'<s>':1,'</s>':2,'a':3,'b':4,'c':5}
invTOK={v:k for k,v in TOK.items()}
V=len(TOK)
class TinyTrans(nn.Module):
    def __init__(self,V,d_model=32,nhead=4,nlayers=1):
        super().__init__()
        self.emb = nn.Embedding(V,d_model)
        self.trans = nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=nlayers, num_decoder_layers=nlayers, batch_first=True)
        self.out = nn.Linear(d_model, V)
    def forward(self, src, tgt):
        tgt_mask = self.trans.generate_square_subsequent_mask(tgt.size(1)).to(tgt.device)
        return self.out(self.trans(self.emb(src), self.emb(tgt), tgt_mask=tgt_mask))

def beam_search_simple(model, src, start=1, end=2, beam=3, maxlen=8):
    model.eval(); src = torch.LongTensor(src).unsqueeze(0).to(device)
    with torch.no_grad():
        memory = model.trans.encoder(model.emb(src))
        beams=[(0.0,[start])]
        for _ in range(maxlen):
            new_beams=[]
            for score, seq in beams:
                if seq[-1]==end:
                    new_beams.append((score, seq)); continue
                tgt = torch.LongTensor(seq).unsqueeze(0).to(device)
                tgt_mask = model.trans.generate_square_subsequent_mask(tgt.size(1)).to(device)
                dec = model.trans.decoder(model.emb(tgt), memory, tgt_mask=tgt_mask)
                logits = model.out(dec[:, -1, :])
                logp = F.log_softmax(logits, dim=-1).squeeze(0).cpu().numpy()
                topk = np.argsort(logp)[-beam:][::-1]
                for k in topk:
                    new_beams.append((score + float(logp[k]), seq+[int(k)]))
            beams = sorted(new_beams, key=lambda x: x[0], reverse=True)[:beam]
        best = max(beams, key=lambda x:x[0])[1]
        return best

model = TinyTrans(V).to(device)
print('Beam demo tokens:', beam_search_simple(model, [3,4,3], beam=3))


## Transformer — Self-attention forward demo
Demonstrates positional encoding + nn.MultiheadAttention forward pass.

In [None]:

def positional_encoding(n,d):
    pe = np.zeros((n,d))
    pos = np.arange(n)[:,None]
    div = np.exp(np.arange(0,d,2)*( - math.log(10000.0)/d))
    pe[:,0::2] = np.sin(pos*div); pe[:,1::2] = np.cos(pos*div)
    return torch.FloatTensor(pe).unsqueeze(0)  # (1,n,d)

mha = nn.MultiheadAttention(embed_dim=32, num_heads=4, batch_first=True)
x = torch.randn(2,6,32)
x_pe = x + positional_encoding(6,32)
out, _ = mha(x_pe, x_pe, x_pe)
print('MHA output shape:', out.shape)


----
Notebook saved to /mnt/data/ML_Advanced_Topics_Playbook.ipynb
If you want additions (more detailed detection head, real dataset examples, extended training loops), tell me which part to expand.