# Lecture 7: Attention Mechanisms

In this lecture, we will introduce attention mechanisms. Attention is the core module in the transformer model, which is the state-of-the-art model for many NLP tasks. Let's reproduce the attention mechanism from scratch step by step.

## Importing libraries

In [None]:
import os
from dataclasses import dataclass
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
from src.utils import load_text, set_seed, configure_device

## Configuration

In [None]:
@dataclass
class GPTConfig:
    root_dir: str = os.getcwd() + "/../../"
    dataset_path: str = "data/names.txt"
    device: torch.device = torch.device('cpu')  # Automatic device configuration

    # Tokenizer
    vocab_size: int = 0  # Set later

    # Model
    context_size: int = 3


    # Training
    val_size: float = 0.1
    batch_size: int = 32
    max_steps: int = 1000
    lr: float = 0.01
    val_interval: int = 100

    seed: int = 101

## Reproducibility

In [None]:
set_seed(GPTConfig.seed)

## Device

In [None]:
GPTConfig.device = configure_device()

## Dataset

In [None]:
names = load_text(GPTConfig.root_dir + GPTConfig.dataset_path).splitlines()

## Tokenizer

In [None]:
class Tokenizer:
    def __init__(self, names):
        self.names = names
        self.vocab = list(set("".join(self.names)))
        self.vocab_size = len(self.vocab)
        self.char2idx = {char: idx for idx, char in enumerate(self.vocab)}
        self.idx2char = {idx: char for idx, char in enumerate(self.vocab)}

    def encode(self, name):
        return [self.char2idx[char] for char in name]

    def decode(self, tokens):
        return "".join([self.idx2char[token] for token in tokens])

tokenizer = Tokenizer(names)

## Preprocessing

In [None]:
class NameDataset(Dataset):
    def __init__(self, names, tokenizer):
        self.names = names
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.names)

    def __getitem__(self, idx):
        name = self.names[idx]
        return self.tokenizer.encode(name)

train_dataset = NameDataset(names, tokenizer)


## Model

### Attention Mechanism

### Training

### Inference