# Custom Dataset with LSTM

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from collections import Counter #word frequency calculations
from itertools import product #grid search combination
import torchvision

import matplotlib.pyplot as plt
import numpy as np

In [5]:
text = """Bu ürün beklentimi fazlasıyla karşıladı. Malzeme kalitesi gerçekten çok iyi. Kargo hızlı ve sorunsuz bir şekilde elime ulaitı. Fiyatına göre performansı harika. Kesinlikle tavsiye ederim!"""

In [7]:
words = text.replace(".", "").replace("!","").lower().split()

In [9]:
#calculate word frequencies and index
word_counts = Counter(words)
vocab = sorted(word_counts, key=word_counts.get, reverse=True) #order word frequencies most to lowest
word_to_ix = {word: i for i, word in enumerate(vocab)}
ix_to_word = {i: word for i, word in enumerate(vocab)}

In [10]:
#training dataset preparation
data = [(words[i], words[i+1]) for i in range(len(words)-1)]

Define LSTM model

In [11]:
class LSTM(nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_dim):
    super(LSTM, self).__init__() #call constructor of higher class
    self.embedding = nn.Embedding(vocab_size, embedding_dim)
    self.lstm = nn.LSTM(embedding_dim, hidden_dim)
    self.fc = nn.Linear(hidden_dim, vocab_size)

  def forward(self, x):
    x = self.embedding(x)  #input to embedding
    lstm_out, _ = self.lstm(x.view(1,1,-1)) #input to lstm
    output = self.fc(lstm_out.view(1,-1))
    return output

model = LSTM(len(vocab), embedding_dim=8, hidden_dim=32)

Hyperparameter Tuning

In [12]:
def prep_seq(seq, to_ix): #word list to tensor
  return torch.tensor([to_ix[w] for w in seq], dtype = torch.long)

#decide hyperparameter tuning combinations
embedding_sizes = [8, 16]
hidden_sizes = [32, 64]
learning_rates = [0.01,0.005]

best_loss = float("inf") #lowest loss value (infinity at the beginning)
best_params = {}

print("Hyperparameter tuning")

Hyperparameter tuning


In [14]:
for emb_size, hidden_size, lr in product(embedding_sizes, hidden_sizes, learning_rates):
  print(f"Test: Embedding: {emb_size}, Hidden: {hidden_size}, Learning Rate: {lr}")
  model = LSTM(len(vocab), emb_size, hidden_size)
  loss_function = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=lr)

  epochs = 50
  total_loss = 0
  for epoch in range(epochs):
    epoch_loss = 0
    for word, next_word in data:
      model.zero_grad()
      input_tensor = prep_seq([word], word_to_ix)
      target_tensor = prep_seq([next_word], word_to_ix)
      output = model(input_tensor)
      loss = loss_function(output, target_tensor)
      loss.backward()
      optimizer.step()
      epoch_loss += loss.item()

    if epoch % 10 == 0:
      print(f"Epoch: {epoch}, Loss: {epoch_loss:.5f}")

  if total_loss < best_loss:
    best_loss = total_loss
    best_params = {"embedding_size": emb_size, "hidden_size": hidden_size, "learning_rate": lr}
  print()

print(f"Best parameters: {best_params}")

Test: Embedding: 8, Hidden: 32, Learning Rate: 0.01
Epoch: 0, Loss: 79.38929
Epoch: 10, Loss: 1.95750
Epoch: 20, Loss: 0.43242
Epoch: 30, Loss: 0.20188
Epoch: 40, Loss: 0.11909

Test: Embedding: 8, Hidden: 32, Learning Rate: 0.005
Epoch: 0, Loss: 78.15204
Epoch: 10, Loss: 13.32028
Epoch: 20, Loss: 2.08681
Epoch: 30, Loss: 0.83645
Epoch: 40, Loss: 0.46268

Test: Embedding: 8, Hidden: 64, Learning Rate: 0.01
Epoch: 0, Loss: 79.27559
Epoch: 10, Loss: 0.54903
Epoch: 20, Loss: 0.14177
Epoch: 30, Loss: 0.06746
Epoch: 40, Loss: 0.03981

Test: Embedding: 8, Hidden: 64, Learning Rate: 0.005
Epoch: 0, Loss: 78.37150
Epoch: 10, Loss: 3.84254
Epoch: 20, Loss: 0.55081
Epoch: 30, Loss: 0.23443
Epoch: 40, Loss: 0.13192

Test: Embedding: 16, Hidden: 32, Learning Rate: 0.01
Epoch: 0, Loss: 78.67019
Epoch: 10, Loss: 1.10942
Epoch: 20, Loss: 0.28933
Epoch: 30, Loss: 0.13987
Epoch: 40, Loss: 0.08370

Test: Embedding: 16, Hidden: 32, Learning Rate: 0.005
Epoch: 0, Loss: 78.06287
Epoch: 10, Loss: 5.96963
Ep

LSTM training

In [15]:
final_model = LSTM(len(vocab), best_params["embedding_size"], best_params["hidden_size"])
optimizer = optim.Adam(final_model.parameters(), lr=best_params["learning_rate"])
loss_function = nn.CrossEntropyLoss()

epochs = 100
for epoch in range(epochs):
  epoch_loss = 0
  for word, next_word in data:
    final_model.zero_grad()
    input_tensor = prep_seq([word], word_to_ix)
    target_tensor = prep_seq([next_word], word_to_ix)
    output = final_model(input_tensor)
    loss = loss_function(output, target_tensor)
    loss.backward()
    optimizer.step()
    epoch_loss += loss.item()

  if epoch % 10 == 0:
    print(f"Final model: Epoch: {epoch}, Loss: {epoch_loss:.5f}")

Final model: Epoch: 0, Loss: 79.29201
Final model: Epoch: 10, Loss: 2.38800
Final model: Epoch: 20, Loss: 0.47853
Final model: Epoch: 30, Loss: 0.22010
Final model: Epoch: 40, Loss: 0.12934
Final model: Epoch: 50, Loss: 0.08565
Final model: Epoch: 60, Loss: 0.06087
Final model: Epoch: 70, Loss: 0.04532
Final model: Epoch: 80, Loss: 0.03486
Final model: Epoch: 90, Loss: 0.02748


Testing and Evaluation

In [19]:
#word prediction function: give start word and generate n words
def pred_seq(start_word, num_words):
  current_word = start_word
  output_sequence = [current_word]

  for _ in range(num_words):
    with torch.no_grad():
      input_tensor = prep_seq([current_word], word_to_ix)
      output = final_model(input_tensor)
      predicted_ix = torch.argmax(output).item() #highest prob word's index
      predicted_word = ix_to_word[predicted_ix] #return index's word
      output_sequence.append(predicted_word)
      current_word = predicted_word #update word for next prediction

  return output_sequence

start_word = "ürün"
num_predictions = 10
predicted_sequence = pred_seq(start_word, num_predictions)
print(" ".join(predicted_sequence))

ürün beklentimi fazlasıyla karşıladı malzeme kalitesi gerçekten çok iyi kargo hızlı
