In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences

2024-07-30 20:35:01.206094: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-30 20:35:01.336505: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-30 20:35:01.386788: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-30 20:35:01.401801: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-30 20:35:01.485065: I tensorflow/core/platform/cpu_feature_guar

In [2]:
# Hyperparameters
max_len = 100
max_features = 10000
batch_size = 16
embedding_dim = 200
hidden_dim = 200
num_epochs = 10

In [3]:
# Load IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), "Train data")
print(len(x_test), "Test data")

# Padding data
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)
print("train shape:", x_train.shape)
print("test shape:", x_test.shape)

# Convert to PyTorch tensors
x_train_tensor = torch.tensor(x_train, dtype=torch.long)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
x_test_tensor = torch.tensor(x_test, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

25000 Train data
25000 Test data
train shape: (25000, 100)
test shape: (25000, 100)


In [4]:
# Create DataLoader
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [5]:
class RNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(RNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, dropout=0.2, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.rnn(x)
        x = x[:, -1, :]  # Get the last time step
        x = self.fc(x)
        return self.sigmoid(x)

In [6]:
model = RNNModel(max_features, embedding_dim, hidden_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

# Training loop
for epoch in range(num_epochs):
    model.train()
    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_x).squeeze()
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")



Epoch 1/10, Loss: 0.925551176071167



KeyboardInterrupt



In [None]:
# Evaluation
model.eval()
with torch.no_grad():
    test_outputs = model(x_test_tensor).squeeze()
    test_predictions = (test_outputs > 0.5).float()
    test_accuracy = (test_predictions == y_test_tensor).float().mean()
    print("Test accuracy:", test_accuracy.item())

In [None]:
# Save the model
torch.save(model.state_dict(), 'saved_model.pth')