In [1]:
pip install torch

--- Logging error ---
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_internal/utils/logging.py", line 177, in emit
    self.console.print(renderable, overflow="ignore", crop=False, style=style)
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_vendor/rich/console.py", line 1673, in print
    extend(render(renderable, render_options))
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_vendor/rich/console.py", line 1305, in render
    for render_output in iter_render:
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_internal/utils/logging.py", line 134, in __rich_console__
    for line in lines:
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_vendor/rich/segment.py", line 249, in split_lines
    for segment in segments:
  File "/Library/

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader, Dataset

In [3]:
class MovieIDPredictor(nn.Module):
    def __init__(self, num_years, num_genres, num_movie_ids, year_embedding_dim=8, genre_embedding_dim=8, transformer_heads=8, transformer_layers=1, transformer_dim=64):
        super(MovieIDPredictor, self).__init__()
        # Define dimensions as instance attributes
        self.year_embedding_dim = year_embedding_dim
        self.genre_embedding_dim = genre_embedding_dim
        self.transformer_dim = transformer_dim

        self.year_embedding = nn.Embedding(num_years, self.year_embedding_dim)
        self.genre_embedding = nn.Embedding(num_genres, self.genre_embedding_dim)

        # TransformerEncoder expects the input dimension (d_model) to match the transformer_dim
        self.transformer = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=self.transformer_dim, nhead=transformer_heads), num_layers=transformer_layers)
        
        # Adjust the rating linear layer to properly match the input and concatenated dimensions
        self.rating_linear = nn.Linear(1, self.transformer_dim - self.year_embedding_dim - self.genre_embedding_dim)
        
        # Output layer to classify movie IDs
        self.fc_out = nn.Linear(self.transformer_dim, num_movie_ids)
        
    def forward(self, year, genre, rating):
        year_emb = self.year_embedding(year).view(-1, 1, self.year_embedding_dim)
        genre_emb = self.genre_embedding(genre).view(-1, 1, self.genre_embedding_dim)
        
        # Fix the view dimensions to match the transformer input size
        rating_emb = self.rating_linear(rating.unsqueeze(-1)).view(-1, 1, self.transformer_dim - self.year_embedding_dim - self.genre_embedding_dim)
        
        x = torch.cat((year_emb, genre_emb, rating_emb), dim=-1)
        x = self.transformer(x)
        x = x.view(-1, self.transformer_dim)  # Reshape for the output layer
        output = self.fc_out(x)
        return output

In [4]:
class MovieDataset(Dataset):
    def __init__(self, years, genres, ratings, movie_ids):
        self.years = years
        self.genres = genres
        self.ratings = ratings
        self.movie_ids = movie_ids
        
    def __len__(self):
        return len(self.years)
        
    def __getitem__(self, idx):
        year = torch.tensor(self.years[idx] - 1888)  # Assuming year range starts from 1888
        genre = torch.tensor(self.genres[idx])
        rating = torch.tensor([self.ratings[idx]])
        movie_id = torch.tensor(self.movie_ids[idx], dtype=torch.long)
        return year, genre, rating, movie_id

# Example data
years = [1994, 2000, 1999, 2005, 2010]
genres = [4, 1, 3, 2, 5]
ratings = [8.5, 7.2, 9.0, 6.5, 7.8]
movie_ids = [0, 1, 2, 3, 4]  # Example class indices for movie IDs

dataset = MovieDataset(years=years, genres=genres, ratings=ratings, movie_ids=movie_ids)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)


In [9]:
# Parameters and model initialization
num_years = 2024 - 1888 + 1
num_genres = 20
num_movie_ids = 5  # Number of unique movie IDs in the sample dataset
model = MovieIDPredictor(num_years=num_years, num_genres=num_genres, num_movie_ids=num_movie_ids)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    for year, genre, rating, movie_id in dataloader:
        optimizer.zero_grad()
        outputs = model(year, genre, rating)
        loss = criterion(outputs, movie_id)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')


Epoch 1, Loss: 2.6201059818267822
Epoch 2, Loss: 1.7572002410888672
Epoch 3, Loss: 2.0883803367614746
Epoch 4, Loss: 2.004603862762451
Epoch 5, Loss: 1.6926970481872559
Epoch 6, Loss: 1.3197959661483765
Epoch 7, Loss: 2.1050262451171875
Epoch 8, Loss: 1.8066092729568481
Epoch 9, Loss: 1.0749341249465942
Epoch 10, Loss: 1.7641313076019287
Epoch 11, Loss: 1.196860671043396
Epoch 12, Loss: 1.215781807899475
Epoch 13, Loss: 0.991085946559906
Epoch 14, Loss: 1.2268166542053223
Epoch 15, Loss: 0.8460947871208191
Epoch 16, Loss: 1.5335986614227295
Epoch 17, Loss: 1.0021827220916748
Epoch 18, Loss: 0.477432519197464
Epoch 19, Loss: 0.9383726119995117
Epoch 20, Loss: 0.31748390197753906


In [10]:
torch.save(model.state_dict(), 'movie_predictor_model.pth')

In [11]:
loaded_model = MovieIDPredictor(num_years=num_years, num_genres=num_genres, num_movie_ids=num_movie_ids)

# Load the state dictionary
loaded_model.load_state_dict(torch.load('movie_predictor_model.pth'))

# Set the model to evaluation mode
loaded_model.eval()

MovieIDPredictor(
  (year_embedding): Embedding(137, 8)
  (genre_embedding): Embedding(20, 8)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (linear1): Linear(in_features=64, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (rating_linear): Linear(in_features=1, out_features=48, bias=True)
  (fc_out): Linear(in_features=64, out_features=5, bias=True)
)

In [14]:
correct = 0
total = 0
with torch.no_grad():  # No need to track gradients during evaluation
    for year, genre, rating, movie_id in dataloader:  # Assuming you have a dataloader for your evaluation dataset
        outputs = loaded_model(year, genre, rating)
        _, predicted = torch.max(outputs.data, 1)  # Get the index of the max log-probability
        total += movie_id.size(0)
        correct += (predicted == movie_id).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the model on the evaluation dataset: {accuracy:.2f}%')


tensor([[ 1.1401,  2.8168, -0.6450, -0.2670, -1.9150],
        [ 0.6575,  0.6791,  0.0517,  2.0141, -1.0081]])
tensor([[ 0.8974, -1.4229,  2.8176, -0.0712, -0.8153],
        [ 0.7505, -1.7163,  0.3867,  0.2974,  3.0293]])
tensor([[ 2.1260,  0.9700,  0.2053, -0.4353, -1.2806]])
Accuracy of the model on the evaluation dataset: 100.00%
