In [1]:
import re
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
import time
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
from torch.optim import lr_scheduler

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
def generate_data(num_sequences, sequence_length):
    x_data = torch.randint(0, 10, (num_sequences, sequence_length))
    y_data = torch.zeros_like(x_data)

    for i in range(sequence_length):
        y_data[:, i] = (x_data[:, i] + x_data[:, 0]) % 10

    return x_data, y_data

In [4]:
input_size = 25
hidden_size = 256
output_size = 25
num_epochs = 10000
batch_size = 256


In [5]:
#data = [generate_data(25) for _ in range(10000)]

In [6]:
#tensor_data = [(torch.unsqueeze(x, 0), torch.unsqueeze(y, 0)) for x, y in data]
#X_train = torch.cat([x for x, _ in tensor_data], dim=0)
#y_train = torch.cat([y for _, y in tensor_data], dim=0)

#train_dataset = TensorDataset(X_train, y_train)
##train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [7]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        output = self.fc(lstm_out)
        return output


In [30]:
x_data_25, y_data_25 = generate_data(1000, 25)
x_data_75, y_data_75 = generate_data(1000, 75)
x_data_125, y_data_125 = generate_data(1000, 125)

In [9]:
model = LSTMModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)

In [10]:
model = model.to(device)


In [11]:
losses = []
for epoch in range(num_epochs):
    model.train()
    print_loss_total = 0
    plot_loss_total = 0


    x_data_25, y_data_25 = x_data_25.float().to(device), y_data_25.float().to(device)

    optimizer.zero_grad()
    output = model(x_data_25)


    loss = criterion(output, y_data_25)

    loss.backward()
    optimizer.step()
    scheduler.step()
    losses.append(loss.item())

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

Epoch [100/10000], Loss: 16.43618392944336
Epoch [200/10000], Loss: 15.285454750061035
Epoch [300/10000], Loss: 15.175142288208008
Epoch [400/10000], Loss: 15.164161682128906
Epoch [500/10000], Loss: 15.163091659545898
Epoch [600/10000], Loss: 15.163037300109863
Epoch [700/10000], Loss: 15.163034439086914
Epoch [800/10000], Loss: 15.163034439086914
Epoch [900/10000], Loss: 15.163034439086914
Epoch [1000/10000], Loss: 15.163034439086914
Epoch [1100/10000], Loss: 15.163034439086914
Epoch [1200/10000], Loss: 15.163034439086914
Epoch [1300/10000], Loss: 15.163034439086914
Epoch [1400/10000], Loss: 15.163034439086914
Epoch [1500/10000], Loss: 15.163034439086914
Epoch [1600/10000], Loss: 15.163034439086914
Epoch [1700/10000], Loss: 15.163034439086914
Epoch [1800/10000], Loss: 15.163034439086914
Epoch [1900/10000], Loss: 15.163034439086914
Epoch [2000/10000], Loss: 15.163034439086914
Epoch [2100/10000], Loss: 15.163034439086914
Epoch [2200/10000], Loss: 15.163034439086914
Epoch [2300/10000], 

In [19]:
results_df = pd.DataFrame({
    'Model': ['LSTM_25'],
    'Loss': [min(losses)]
})


results_df

Unnamed: 0,Model,Loss
0,LSTM_25,15.163034


Теперь увеличим длину последовательности до 75

In [20]:
input_size = 75

hidden_size = 256
output_size = 75
num_epochs = 10000
batch_size = 128

In [21]:
model = LSTMModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)

In [22]:
model = model.to(device)

In [23]:
losses = []
for epoch in range(num_epochs):
    model.train()
    print_loss_total = 0
    plot_loss_total = 0


    x_data_75, y_data_75 = x_data_75.float().to(device), y_data_75.float().to(device)

    optimizer.zero_grad()
    output = model(x_data_75)


    loss = criterion(output, y_data_75)

    loss.backward()
    optimizer.step()
    scheduler.step()
    losses.append(loss.item())

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

Epoch [100/10000], Loss: 22.295482635498047
Epoch [200/10000], Loss: 15.882929801940918
Epoch [300/10000], Loss: 11.706459999084473
Epoch [400/10000], Loss: 9.657875061035156
Epoch [500/10000], Loss: 8.791481971740723
Epoch [600/10000], Loss: 8.446561813354492
Epoch [700/10000], Loss: 8.312638282775879
Epoch [800/10000], Loss: 8.261210441589355
Epoch [900/10000], Loss: 8.24148178100586
Epoch [1000/10000], Loss: 8.233824729919434
Epoch [1100/10000], Loss: 8.233343124389648
Epoch [1200/10000], Loss: 8.232941627502441
Epoch [1300/10000], Loss: 8.232573509216309
Epoch [1400/10000], Loss: 8.23223876953125
Epoch [1500/10000], Loss: 8.231928825378418
Epoch [1600/10000], Loss: 8.231647491455078
Epoch [1700/10000], Loss: 8.231388092041016
Epoch [1800/10000], Loss: 8.231151580810547
Epoch [1900/10000], Loss: 8.23093318939209
Epoch [2000/10000], Loss: 8.230731964111328
Epoch [2100/10000], Loss: 8.230711936950684
Epoch [2200/10000], Loss: 8.230692863464355
Epoch [2300/10000], Loss: 8.2306728363037

In [24]:
df_75 = pd.DataFrame({
    'Model': ['LSTM_75'],
    'Loss': [min(losses)]
})

In [25]:
results_df = pd.concat([results_df, df_75], ignore_index=True)
results_df

Unnamed: 0,Model,Loss
0,LSTM_25,15.163034
1,LSTM_75,8.230538


Увеличим длину последовательности до 125

In [26]:
input_size = 125
hidden_size = 128
output_size = 125
num_epochs = 10000
batch_size = 256

In [27]:
model = LSTMModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)

In [28]:
model = model.to(device)

In [None]:
losses = []
for epoch in range(num_epochs):
    model.train()
    print_loss_total = 0
    plot_loss_total = 0


    x_data_125, y_data_125 = x_data_125.float().to(device), y_data_125.float().to(device)

    optimizer.zero_grad()
    output = model(x_data_125)


    loss = criterion(output, y_data_125)

    loss.backward()
    optimizer.step()
    scheduler.step()
    losses.append(loss.item())

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

In [32]:
df_125 = pd.DataFrame({
    'Model': ['LSTM_125'],
    'Loss': [min(losses)]
})

In [33]:
results_df = pd.concat([results_df, df_125], ignore_index=True)
results_df

Unnamed: 0,Model,Loss
0,LSTM_25,15.163034
1,LSTM_75,8.230538
2,LSTM_125,9.992659


# Теперь попробуем с RNN

In [34]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out)
        return out

In [35]:
input_size = 25
hidden_size = 128
output_size = 25
num_epochs = 10000
batch_size = 256

In [36]:
model = RNNModel(input_size, hidden_size,  output_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)

In [None]:
model = model.to(device)
losses = []
for epoch in range(num_epochs):
    model.train()
    print_loss_total = 0
    plot_loss_total = 0


    x_data_25, y_data_25 = x_data_25.float().to(device), y_data_25.float().to(device)

    optimizer.zero_grad()
    output = model(x_data_25)


    loss = criterion(output, y_data_25)

    loss.backward()
    optimizer.step()
    scheduler.step()
    losses.append(loss.item())

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

In [38]:
df_r25 = pd.DataFrame({
    'Model': ['RNN_25'],
    'Loss': [min(losses)]
})
df_r25

Unnamed: 0,Model,Loss
0,RNN_25,8.193528


Для 75

In [39]:
input_size = 75
hidden_size = 128
output_size = 75
num_epochs = 10000
batch_size = 256

In [40]:
model = RNNModel(input_size, hidden_size,  output_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)

In [None]:
model = model.to(device)
losses = []
for epoch in range(num_epochs):
    model.train()
    print_loss_total = 0
    plot_loss_total = 0


    x_data_75, y_data_75 = x_data_75.float().to(device), y_data_75.float().to(device)

    optimizer.zero_grad()
    output = model(x_data_75)


    loss = criterion(output, y_data_75)

    loss.backward()
    optimizer.step()
    scheduler.step()
    losses.append(loss.item())

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

In [43]:
df_r75 = pd.DataFrame({
    'Model': ['RNN_75'],
    'Loss': [min(losses)]
})

df_r75

Unnamed: 0,Model,Loss
0,RNN_75,8.225663


In [44]:
results_df_rnn = pd.concat([df_r25, df_r75], ignore_index=True)
results_df_rnn

Unnamed: 0,Model,Loss
0,RNN_25,8.193528
1,RNN_75,8.225663


Для 125

In [45]:
input_size = 125
hidden_size = 128
output_size = 125
num_epochs = 10000
batch_size = 256

In [46]:
model = RNNModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)

In [None]:
model = model.to(device)
losses = []
for epoch in range(num_epochs):
    model.train()
    print_loss_total = 0
    plot_loss_total = 0


    x_data_125, y_data_125 = x_data_125.float().to(device), y_data_125.float().to(device)

    optimizer.zero_grad()
    output = model(x_data_125)


    loss = criterion(output, y_data_125)

    loss.backward()
    optimizer.step()
    scheduler.step()
    losses.append(loss.item())

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

In [48]:
df_r125 = pd.DataFrame({
    'Model': ['RNN_125'],
    'Loss': [min(losses)]
})


In [49]:
results_df_rnn = pd.concat([df_r25, df_r75, df_r125 ], ignore_index=True)
results_df_rnn

Unnamed: 0,Model,Loss
0,RNN_25,8.193528
1,RNN_75,8.225663
2,RNN_125,8.449386


# Теперь все тоже самое, но с GRU

In [50]:
input_size = 25
hidden_size = 128
output_size = 25
num_epochs = 10000
batch_size = 256

In [51]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUModel, self).__init__()
        self.rnn = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out)
        return out

In [52]:
model = GRUModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)

In [None]:
model = model.to(device)
losses = []
for epoch in range(num_epochs):
    model.train()
    print_loss_total = 0
    plot_loss_total = 0


    x_data_25, y_data_25 = x_data_25.float().to(device), y_data_25.float().to(device)

    optimizer.zero_grad()
    output = model(x_data_25)


    loss = criterion(output, y_data_25)

    loss.backward()
    optimizer.step()
    scheduler.step()
    losses.append(loss.item())

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

In [54]:
df_g25 = pd.DataFrame({
    'Model': ['GRU_25'],
    'Loss': [min(losses)]
})


In [55]:
df_g25

Unnamed: 0,Model,Loss
0,GRU_25,8.199738


Теперь 75

In [59]:
input_size = 75
hidden_size = 128
output_size = 75
num_epochs = 10000
batch_size = 256

In [60]:
model = GRUModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)

In [None]:
model = model.to(device)
losses = []
for epoch in range(num_epochs):
    model.train()
    print_loss_total = 0
    plot_loss_total = 0


    x_data_75, y_data_75 = x_data_75.float().to(device), y_data_75.float().to(device)

    optimizer.zero_grad()
    output = model(x_data_75)


    loss = criterion(output, y_data_75)

    loss.backward()
    optimizer.step()
    scheduler.step()
    losses.append(loss.item())

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

In [62]:
df_g75 = pd.DataFrame({
    'Model': ['GRU_75'],
    'Loss': [min(losses)]
})


In [63]:
results_df_gru = pd.concat([df_g25, df_g75], ignore_index=True)
results_df_gru

Unnamed: 0,Model,Loss
0,GRU_25,8.199738
1,GRU_75,8.232573


И 125

In [64]:
input_size = 125
hidden_size = 128
output_size = 125
num_epochs = 10000
batch_size = 256

In [65]:
model = GRUModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)

In [None]:
model = model.to(device)
losses = []
for epoch in range(num_epochs):
    model.train()
    print_loss_total = 0
    plot_loss_total = 0


    x_data_125, y_data_125 = x_data_125.float().to(device), y_data_125.float().to(device)

    optimizer.zero_grad()
    output = model(x_data_125)


    loss = criterion(output, y_data_125)

    loss.backward()
    optimizer.step()
    scheduler.step()
    losses.append(loss.item())

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

In [67]:
df_g125 = pd.DataFrame({
    'Model': ['GRU_125'],
    'Loss': [min(losses)]
})


In [68]:
results_df_gru = pd.concat([df_g25, df_g75, df_g125], ignore_index=True)
results_df_gru

Unnamed: 0,Model,Loss
0,GRU_25,8.199738
1,GRU_75,8.232573
2,GRU_125,8.459264


In [69]:
final = pd.concat([results_df, results_df_rnn, results_df_gru], ignore_index=True)
final

Unnamed: 0,Model,Loss
0,LSTM_25,15.163034
1,LSTM_75,8.230538
2,LSTM_125,9.992659
3,RNN_25,8.193528
4,RNN_75,8.225663
5,RNN_125,8.449386
6,GRU_25,8.199738
7,GRU_75,8.232573
8,GRU_125,8.459264


Как мы видим, с увеличением длиы последовательности модели хуже обучаются
