In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

## Linear model

In [13]:
class LinearModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        return self.linear(x)

In [14]:
input_size = 1
output_size = 1
model = LinearModel(input_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

inputs = torch.randn(10, input_size)
targets = torch.randn(10, output_size)

for epoch in range(1, 101):
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, loss: {loss.item():.4f}")

Epoch 10, loss: 1.0078
Epoch 20, loss: 0.9642
Epoch 30, loss: 0.9359
Epoch 40, loss: 0.9174
Epoch 50, loss: 0.9053
Epoch 60, loss: 0.8973
Epoch 70, loss: 0.8921
Epoch 80, loss: 0.8886
Epoch 90, loss: 0.8863
Epoch 100, loss: 0.8847


## CNN

In [15]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc = nn.Linear(32 * 14 * 14, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv(x)))
        x = x.view(-1, 32 * 14 * 14)
        x = self.fc(x)
        return x

In [16]:
model = ConvNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

inputs = torch.randn(10, 1, 28, 28)
targets = torch.randint(0, 10, (10,))

for epoch in range(1, 101):
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, loss: {loss.item():.4f}")

Epoch 10, loss: 0.0576
Epoch 20, loss: 0.0009
Epoch 30, loss: 0.0003
Epoch 40, loss: 0.0002
Epoch 50, loss: 0.0001
Epoch 60, loss: 0.0001
Epoch 70, loss: 0.0001
Epoch 80, loss: 0.0001
Epoch 90, loss: 0.0001
Epoch 100, loss: 0.0001


## RNN

In [18]:
class RecurrentNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RecurrentNet, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), hidden_size)
        out, hn = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out

In [19]:
input_size = 10
hidden_size = 20
output_size = 1
model = RecurrentNet(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

inputs = torch.randn(10, 5, input_size)  # batch len, seq len, input size
targets = torch.randn(10, output_size)

for epoch in range(1, 101):
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, loss: {loss.item():.4f}")

Epoch 10, loss: 0.7876
Epoch 20, loss: 0.5976
Epoch 30, loss: 0.4260
Epoch 40, loss: 0.2734
Epoch 50, loss: 0.1510
Epoch 60, loss: 0.0678
Epoch 70, loss: 0.0221
Epoch 80, loss: 0.0051
Epoch 90, loss: 0.0010
Epoch 100, loss: 0.0002


## Transformer

In [24]:
class TransformerNet(nn.Module):
    def __init__(self, input_dim, model_dim, num_heads, num_layers, output_dim):
        super(TransformerNet, self).__init__()
        self.embedding = nn.Embedding(input_dim, model_dim)
        self.transformer = nn.Transformer(d_model=model_dim, nhead=num_heads, num_encoder_layers=num_layers, num_decoder_layers=num_layers)
        self.fc = nn.Linear(model_dim, output_dim)

    def forward(self, src, tgt):
        src = self.embedding(src)
        tgt = self.embedding(tgt)
        src = src.permute(1, 0, 2)
        tgt = tgt.permute(1, 0, 2)
        out = self.transformer(src, tgt)
        out = self.fc(out)
        return out

In [25]:
input_dim = 1000
model_dim = 512
num_heads = 8
num_layers = 6
output_dim = 1000
model = TransformerNet(input_dim, model_dim, num_heads, num_layers, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

src = torch.randint(0, input_dim, (10, 20))
tgt = torch.randint(0, output_dim, (10, 20))

for epoch in range(1, 101):
    optimizer.zero_grad()
    outputs = model(src, tgt)
    loss = criterion(outputs.view(-1, output_dim), tgt.view(-1))
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, loss: {loss.item():.4f}")

Epoch 10, loss: 4.7856
Epoch 20, loss: 4.2999
Epoch 30, loss: 4.2458
Epoch 40, loss: 4.2265
Epoch 50, loss: 5.2620
Epoch 60, loss: 5.5897
Epoch 70, loss: 5.2382
Epoch 80, loss: 4.9521
Epoch 90, loss: 4.9104
Epoch 100, loss: 4.8787
