# Lecture 6: Batch Normalization and Residual Streams

In this lecture, we will discuss two important techniques that have been shown to be very effective in training deep neural networks: Batch Normalization and Residual Streams. We will discuss both of these techniques in detail and show how they can be used to improve the performance of deep neural networks.

### Importing libraries

In [None]:
import os
from dataclasses import dataclass
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.nn import functional as F
from src.utils import load_text, set_seed, configure_device

### Configuration

In [None]:
@dataclass
class CNNConfig:
    root_dir: str = os.getcwd() + "/../../"
    dataset_path: str = "data/names.txt"
    device: torch.device = torch.device('cpu')  # Automatic device configuration

    # Tokenizer
    vocab_size: int = 0  # Set later

    # Training
    val_size: float = 0.1
    batch_size: int = 32
    max_steps: int = 1000
    lr: float = 0.01
    val_interval: int = 100

    seed: int = 101

### Reproducibility

In [None]:
set_seed(CNNConfig.seed)

### Device

In [None]:
CNNConfig.device = configure_device()

### Dataset

In [None]:
# Load text and split by lines
names = load_text(CNNConfig.root_dir + CNNConfig.dataset_path).splitlines()

### Tokenizer

In [None]:
chars = [chr(i) for i in range(97, 123)]  # all alphabet characters
chars.insert(0, ".")  # Add special token
CNNConfig.vocab_size = len(chars)
str2idx = {char: idx for idx, char in enumerate(chars)}
idx2str = {idx: char for char, idx in str2idx.items()}

### Preprocessing

In [None]:
# Train-Val Split
train_names, val_names = train_test_split(names, test_size=CNNConfig.val_size, random_state=CNNConfig.seed)

### Model

In [None]:
class CNN(nn.Module):
    def __init__(self, config):
        super(CNN, self).__init__()
        self.config = config

        self.embedding = nn.Embedding(config.vocab_size, config.context_size)
        self.conv1 = nn.Conv1d(config.context_size, 128, 3)
        self.conv2 = nn.Conv1d(128, 256, 3)
        self.fc = nn.Linear(256, config.vocab_size)

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(0, 2, 1)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.mean(dim=2)
        x = self.fc(x)
        return x

### Training

### Inference