In [None]:
!pip install torch numpy pandas matplotlib



# Data preprocessing

load file from google drive before you run this

In [None]:
import numpy as np
import pandas as pd
import torch.utils.data
from torch.utils.data import DataLoader, Dataset

import os
def get_index_range(df, column_name):
    print("column_name: {} max: {} min: {}".format(column_name, df[column_name].max(), df[column_name].min()))
    print("unique values: ", df[column_name].nunique())

def get_dataset_info(df):
    for column_name in df.columns:
        get_index_range(df, column_name)


if not os.path.exists("train_data.csv") or not os.path.exists("test_data.csv"):
    users = pd.read_csv(
        "users.dat",
        sep="::",
        names=["user_id", "sex", "age_group", "occupation", "zip_code"],
    )

    ratings = pd.read_csv(
        "ratings.dat",
        sep="::",
        names=["user_id", "movie_id", "rating", "unix_timestamp"],
    )

    movies = pd.read_csv(
        "movies.dat", sep="::", names=["movie_id", "title", "genres"], encoding='latin-1'
    )
    # if you want occupation as text
    occupation = {0: "other", 1: "academic/educator", 2: "artist", 3: "clerical/admin", 4: "college/grad student",
                  5: "customer service", 6: "doctor/health care", 7: "executive/managerial", 8: "farmer", 9: "homemaker",
                  10: "K-12 student", 11: "lawyer", 12: "programmer", 13: "retired", 14: "sales/marketing", 15: "scientist",
                  16: "self-employed", 17: "technician/engineer", 18: "tradesman/craftsman", 19: "unemployed", 20: "writer"}
    sex_dict = {'F': 0, 'M': 1}

    users["user_id"] = users["user_id"].apply(lambda x: int(x))
    users["age_group"] = users["age_group"].apply(lambda x: int(x))
    # if you want occupation as text
    # users["occupation"] = users["occupation"].apply(lambda x: occupation[int(x)])
    users["occupation"] = users["occupation"].apply(lambda x: int(x))
    users["sex"] = users["sex"].apply(lambda x: sex_dict[x])

    movies["movie_id"] = movies["movie_id"].apply(lambda x: int(x))

    ratings["movie_id"] = ratings["movie_id"].apply(lambda x: int(x))
    ratings["user_id"] = ratings["user_id"].apply(lambda x: int(x))
    ratings["rating"] = ratings["rating"].apply(lambda x: float(x))

    genres = [
        "Action",
        "Adventure",
        "Animation",
        "Children's",
        "Comedy",
        "Crime",
        "Documentary",
        "Drama",
        "Fantasy",
        "Film-Noir",
        "Horror",
        "Musical",
        "Mystery",
        "Romance",
        "Sci-Fi",
        "Thriller",
        "War",
        "Western",
    ]

    genre_ids = {genre: i for genre, i in zip(genres, range(len(genres)))}
    genre_comb_map = {}

    def genre_to_index(genre_str):
        genre_lst = genre_str.split("|")
        value = 0
        for g in genre_lst:
            value |= (1 << genre_ids[g])
        if value not in genre_comb_map:
            genre_comb_map[value] = len(genre_comb_map)
        return genre_comb_map[value]

    movies.genres = movies.genres.apply(lambda x: genre_to_index(x))

    # Print dataset info after transformations
    get_dataset_info(movies)
    get_dataset_info(ratings)
    get_dataset_info(users)

    ratings = ratings.join(movies.set_index("movie_id"), on="movie_id")
    ratings_group = ratings.sort_values(by=["unix_timestamp"]).groupby("user_id")

    ratings_data = pd.DataFrame(
        data={
            "user_id": list(ratings_group.groups.keys()),
            "movie_ids": list(ratings_group.movie_id.apply(list)),
            "ratings": list(ratings_group.rating.apply(list)),
            "genres": list(ratings_group.genres.apply(list)),
            "timestamps": list(ratings_group.unix_timestamp.apply(list)),
        }
    )

    sequence_length = 10
    step_size = 2


    def create_sequences(values, window_size, step_size):
        sequences = []
        start_index = 0
        while True:
            end_index = start_index + window_size
            seq = values[start_index:end_index]
            if len(seq) < window_size:
                seq = values[-window_size:]
                if len(seq) == window_size:
                    sequences.append(seq)
                break
            sequences.append(seq)
            start_index += step_size
        return sequences

    ratings_data.movie_ids = ratings_data.movie_ids.apply(
        lambda ids: create_sequences(ids, sequence_length, step_size)
    )

    ratings_data.ratings = ratings_data.ratings.apply(
        lambda ids: create_sequences(ids, sequence_length, step_size)
    )

    ratings_data.genres = ratings_data.genres.apply(
        lambda ids: create_sequences(ids, sequence_length, step_size)
    )

    del ratings_data["timestamps"]

    ratings_data_movies = ratings_data[["user_id", "movie_ids"]].explode(
        "movie_ids", ignore_index=True
    )
    ratings_data_rating = ratings_data[["ratings"]].explode("ratings", ignore_index=True)
    ratings_data_genres = ratings_data[["genres"]].explode("genres", ignore_index=True)
    ratings_data_transformed = pd.concat([ratings_data_movies, ratings_data_rating, ratings_data_genres], axis=1)
    ratings_data_transformed = ratings_data_transformed.join(
        users.set_index("user_id"), on="user_id"
    )

    ratings_data_transformed.movie_ids = ratings_data_transformed.movie_ids.apply(
        lambda x: ",".join([str(v) for v in x])
    )
    ratings_data_transformed.ratings = ratings_data_transformed.ratings.apply(
        lambda x: ",".join([str(v) for v in x])
    )
    ratings_data_transformed.genres = ratings_data_transformed.genres.apply(
        lambda x: ",".join([str(v) for v in x])
    )

    del ratings_data_transformed["zip_code"]

    ratings_data_transformed.rename(
        columns={"movie_ids": "sequence_movie_ids", "ratings": "sequence_ratings"},
        inplace=True,
    )

    random_selection = np.random.rand(len(ratings_data_transformed.index)) <= 0.85
    train_data = ratings_data_transformed[random_selection]
    test_data = ratings_data_transformed[~random_selection]

    train_data.to_csv("train_data.csv", index=False, sep="|", header=False)
    test_data.to_csv("test_data.csv", index=False, sep="|", header=False)
    ratings_data_transformed.to_csv("all_data.csv", index=False, sep="|", header=False)


  return func(*args, **kwargs)


column_name: movie_id max: 3952 min: 1
unique values:  3883
column_name: title max: eXistenZ (1999) min: $1,000,000 Duck (1971)
unique values:  3883
column_name: genres max: 300 min: 0
unique values:  301
column_name: user_id max: 6040 min: 1
unique values:  6040
column_name: movie_id max: 3952 min: 1
unique values:  3706
column_name: rating max: 5.0 min: 1.0
unique values:  5
column_name: unix_timestamp max: 1046454590 min: 956703932
unique values:  458455
column_name: user_id max: 6040 min: 1
unique values:  6040
column_name: sex max: 1 min: 0
unique values:  2
column_name: age_group max: 56 min: 1
unique values:  7
column_name: occupation max: 20 min: 0
unique values:  21
column_name: zip_code max: 99945 min: 00231
unique values:  3439


# Dataset & Dataloader

In [None]:
class MovieLensDataset(Dataset):
    def __init__(self, file_name):
        self.df = pd.read_csv(file_name, delimiter='|')

    def __len__(self):
        return len(self.df)

    def expand_to_list(self, value, seq_len):
        return np.array([int(value)] * seq_len)

    def __getitem__(self, idx, is_training=True):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        movies = self.df.iloc[idx, 1].split(',')
        ratings = self.df.iloc[idx, 2].split(',')
        genres = self.df.iloc[idx, 3].split(',')
        seq_len = len(movies)
        ret = {
            'user_id': self.expand_to_list(self.df.iloc[idx, 0], seq_len),
            'movie_id': np.array(movies).astype(int),
            'rating': np.array(ratings).astype(float),
            'genres':np.array(genres).astype(int),
            'sex': self.expand_to_list(self.df.iloc[idx, 4], seq_len),
            'age': self.expand_to_list(self.df.iloc[idx, 5], seq_len),
            'occupation': self.expand_to_list(self.df.iloc[idx, 6], seq_len),
        }
        return ret

test_dataset = MovieLensDataset('test_data.csv')
train_dataset = MovieLensDataset('train_data.csv')

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True, num_workers=2)
val_dataloader = DataLoader(test_dataset, batch_size=10, shuffle=True, num_workers=2)

for i_batch, sample in enumerate(val_dataloader):
    print(i_batch, sample)
    if i_batch == 3:
        break


0 {'user_id': tensor([[1607, 1607, 1607, 1607, 1607, 1607, 1607, 1607, 1607, 1607],
        [1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243, 1243],
        [1140, 1140, 1140, 1140, 1140, 1140, 1140, 1140, 1140, 1140],
        [3558, 3558, 3558, 3558, 3558, 3558, 3558, 3558, 3558, 3558],
        [1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340, 1340],
        [4746, 4746, 4746, 4746, 4746, 4746, 4746, 4746, 4746, 4746],
        [ 608,  608,  608,  608,  608,  608,  608,  608,  608,  608],
        [1790, 1790, 1790, 1790, 1790, 1790, 1790, 1790, 1790, 1790],
        [ 509,  509,  509,  509,  509,  509,  509,  509,  509,  509],
        [2383, 2383, 2383, 2383, 2383, 2383, 2383, 2383, 2383, 2383]]), 'movie_id': tensor([[1365, 2248, 2067, 2791,  594,  523,  978, 2437, 2345, 3949],
        [ 597, 2003, 2144, 1380, 3712, 2145, 2746, 2150, 1297, 1441],
        [2905, 1304,   36,  318, 2366, 1465, 2700, 3653, 2202, 1957],
        [ 539,  344, 2372, 2416,  788, 3688, 1895,  520, 1135,

In [None]:
print(len(train_dataloader))

40842


# Evaluation?

In [None]:
evaluation_mode = True

# Transformer

In [None]:
import torch
import torch.nn as nn

import random
import math
import numpy as np
import matplotlib.pyplot as plt

In [None]:
torch.manual_seed(0)
np.random.seed(0)

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, model_dim, dropout_p, max_len):
        super().__init__()
        # Modified version from: https://pytorch.org/tutorials/beginner/transformer_tutorial.html
        # max_len determines how far the position can have an effect on a token (window)
        
        # Info
        self.dropout = nn.Dropout(dropout_p)
        
        # Encoding - From formula
        pos_encoding = torch.zeros(max_len, model_dim)
        positions_list = torch.arange(0, max_len, dtype=torch.float).view(-1, 1) # 0, 1, 2, 3, 4, 5
        division_term = torch.exp(torch.arange(0, model_dim, 2).float() * (-math.log(10000.0)) / model_dim) # 1000^(2i/model_dim)
        
        # PE(pos, 2i) = sin(pos/1000^(2i/model_dim))
        pos_encoding[:, 0::2] = torch.sin(positions_list * division_term)
        
        # PE(pos, 2i + 1) = cos(pos/1000^(2i/model_dim))
        pos_encoding[:, 1::2] = torch.cos(positions_list * division_term)
        
        # Saving buffer (same as parameter without gradients needed)
        pos_encoding = pos_encoding.unsqueeze(0).transpose(0, 1)
        self.register_buffer("pos_encoding", pos_encoding)
        
    def forward(self, token_embedding: torch.tensor) -> torch.tensor:
        # Residual connection + pos encoding
        return self.dropout(token_embedding + self.pos_encoding[:token_embedding.size(0), :])

In [None]:
class Transformer(nn.Module):
    """
    Model from "A detailed guide to Pytorch's nn.Transformer() module.", by Daniel Melchor: https://medium.com/p/c80afbc9ffb1/
    """
    # Constructor
    def __init__(
        self,
        model_dim,
        num_heads,
        num_encoder_layers,
        num_decoder_layers,
        dropout_p,
        num_users=6041,
        num_movies=3953,
        num_genres=301,
        num_sex=2, 
        num_age=60,
        num_occupation=21,
    ):
        super().__init__()
        self.model_dim = model_dim
        self.positional_encoder = PositionalEncoding(
            model_dim=model_dim, dropout_p=dropout_p, max_len=5000
        )
        self.user_embedding = nn.Embedding(num_users, model_dim)
        self.movie_embedding = nn.Embedding(num_movies, model_dim)
        self.genre_embedding = nn.Embedding(num_genres, model_dim)
        self.sex_embedding = nn.Embedding(num_sex, model_dim)
        self.age_embedding = nn.Embedding(num_age, model_dim)
        self.occupation_embedding = nn.Embedding(num_occupation, model_dim)
        self.transformer = nn.Transformer(
            d_model=model_dim,
            nhead=num_heads,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            dropout=dropout_p,
        )

        # Regression
        self.out = nn.Linear(model_dim, 1)
        
    def forward(self, user, movie, genre, sex, age, occupation, rating, mask=None):
        movie = self.movie_embedding(movie) * math.sqrt(self.model_dim)
        user = self.user_embedding(user) * math.sqrt(self.model_dim)
        genre = self.genre_embedding(genre) * math.sqrt(self.model_dim)
        sex = self.sex_embedding(sex) * math.sqrt(self.model_dim)
        age = self.age_embedding(age) * math.sqrt(self.model_dim)
        occupation = self.occupation_embedding(occupation) * math.sqrt(self.model_dim)

        #print(movie)
        feature_embedding = movie + user + genre + sex + age + occupation
        features = self.positional_encoder(feature_embedding) 
        features = features.permute(1,0,2)
        
        transformer_out = self.transformer(features, features, mask)
        transformer_out = transformer_out.permute(1,0,2)
        out = self.out(transformer_out)
        
        return out
      
    def get_tgt_mask(self, size) -> torch.tensor:
        mask = torch.tril(torch.ones(size, size) == 1) # Lower triangular matrix
        mask = mask.float()
        mask = mask.masked_fill(mask == 0, float('-inf')) # Convert zeros to -inf
        mask = mask.masked_fill(mask == 1, float(0.0)) # Convert ones to 0
        
        # EX for size=5:
        # [[0., -inf, -inf, -inf, -inf],
        #  [0.,   0., -inf, -inf, -inf],
        #  [0.,   0.,   0., -inf, -inf],
        #  [0.,   0.,   0.,   0., -inf],
        #  [0.,   0.,   0.,   0.,   0.]]
        
        return mask
    
    def create_pad_mask(self, matrix: torch.tensor, pad_token: int) -> torch.tensor:
        # If matrix = [1,2,3,0,0,0] where pad_token=0, the result mask is
        # [False, False, False, True, True, True]
        return (matrix == pad_token)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Transformer(
    model_dim=16, num_heads=2, num_encoder_layers=3, num_decoder_layers=3, dropout_p=0.1
).to(device)
if os.path.exists('trained_trans_10_epochs.pth'):
    model.load_state_dict(torch.load('trained_trans_10_epochs.pth'))
model.train()
opt = torch.optim.SGD(model.parameters(), lr=0.01)
loss_fn = nn.MSELoss()

In [None]:
def train_loop(model, opt, loss_fn, dataloader):
    model.train()
    total_loss = 0
    
    for i, batch in enumerate(dataloader):
        if i % 100 == 0:
            print(i)
        users, movies = batch['user_id'], batch['movie_id']
        genres, sex = batch['genres'], batch['sex']
        age, occupation = batch['age'], batch['occupation']
        ratings = batch['rating']
        users, movies =  users.long().to(device), movies.long().to(device)
        genres, sex = genres.long().to(device), sex.long().to(device)
        age, occupation = age.long().to(device), occupation.long().to(device)
        ratings = torch.tensor(ratings).float().to(device) 

        #print(users, movies)
        sequence_length = ratings.size(1)
        batch_len = ratings.size(0)
        mask = model.get_tgt_mask(sequence_length).to(device)
        # Standard training except we pass in y_input and tgt_mask
        pred = model(users, movies, genres, sex, age, occupation, mask)
        # print(pred.shape, ratings.shape)
        loss = loss_fn(pred.squeeze(dim=-1), ratings)

        opt.zero_grad()
        loss.backward()
        opt.step()

        total_loss += loss.detach().item()
        
    return total_loss / len(dataloader)

In [None]:
def validation_loop(model, loss_fn, dataloader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in dataloader:
            users, movies = batch['user_id'], batch['movie_id']
            genres, sex = batch['genres'], batch['sex']
            age, occupation = batch['age'], batch['occupation']
            ratings = batch['rating']
            #print(users.shape, movies.shape)
            users, movies =  users.long().to(device), movies.long().to(device)
            genres, sex = genres.long().to(device), sex.long().to(device)
            age, occupation = age.long().to(device), occupation.long().to(device)
            ratings = torch.tensor(ratings).float().to(device) 

            loss = 0
            #print(users, movies)
            sequence_length = ratings.size(1)
            batch_len = ratings.size(0)
            mask = model.get_tgt_mask(sequence_length).to(device)
            # Standard training except we pass in y_input and tgt_mask
            pred = model(users, movies, genres, sex, age, occupation, mask)
            loss = loss_fn(pred.squeeze(dim=-1), ratings)

            total_loss += loss.detach().item()
        
    return total_loss / len(dataloader)

In [None]:
def fit(model, opt, loss_fn, train_dataloader, val_dataloader, epochs):
    # Used for plotting later on
    train_loss_list, validation_loss_list = [], []
    val_accs = []
    
    print("Training and validating model")
    for epoch in range(epochs):
        print("-"*25, f"Epoch {epoch + 1}","-"*25)
        
        train_loss = train_loop(model, opt, loss_fn, train_dataloader)
        train_loss_list += [train_loss]
        
        validation_loss = validation_loop(model, loss_fn, val_dataloader)
        validation_loss_list += [validation_loss]
        
        print(f"Training loss: {train_loss: .4f}")
        print(f"Validation loss: {validation_loss: .4f}")
        
    return train_loss_list, validation_loss_list

In [None]:
if not evaluation_mode:
    train_loss_list, validation_loss_list = fit(model, opt, loss_fn, train_dataloader, val_dataloader, 10)

In [None]:
if not evaluation_mode:
    plt.plot(train_loss_list, label = "Train loss")
    plt.plot(validation_loss_list, label = "Validation loss")
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss vs Epoch')
    plt.legend()
    plt.show()

In [None]:
torch.save(model.state_dict(), 'trained_trans_10_epochs.pth')

# LSTM


In [None]:
class LSTM_Model(nn.Module):
    """
    Model from "A detailed guide to Pytorch's nn.Transformer() module.", by Daniel Melchor: https://medium.com/p/c80afbc9ffb1/
    """
    # Constructor
    def __init__(
        self,
        model_dim,       
        dropout_p,
        num_users=6041,
        num_movies=3953,
        num_genres=301,
        num_ratings=5,
        num_sex=2, 
        num_age=60,
        num_occupation=21,
    ):
        super().__init__()

        # INFO
        self.model_type = "Transformer"
        self.model_dim = model_dim
        self.user_embedding = nn.Embedding(num_users, model_dim)
        self.movie_embedding = nn.Embedding(num_movies, model_dim)
        self.genre_embedding = nn.Embedding(num_genres, model_dim)
        self.sex_embedding = nn.Embedding(num_sex, model_dim)
        self.age_embedding = nn.Embedding(num_age, model_dim)
        self.occupation_embedding = nn.Embedding(num_occupation, model_dim)
        self.lstm = nn.LSTM(model_dim, model_dim * 2, 3, dropout=dropout_p, batch_first=True)
        # Regression
        self.out = nn.Linear(model_dim * 2, 1)
        
    def forward(self, user, movie, genre, sex, age, occupation):
        movie = self.movie_embedding(movie) * math.sqrt(self.model_dim)
        user = self.user_embedding(user) * math.sqrt(self.model_dim)
        genre = self.genre_embedding(genre) * math.sqrt(self.model_dim)
        sex = self.sex_embedding(sex) * math.sqrt(self.model_dim)
        age = self.age_embedding(age) * math.sqrt(self.model_dim)
        occupation = self.occupation_embedding(occupation) * math.sqrt(self.model_dim)

        feature_embedding = movie + user + genre + sex + age + occupation
        
        output, _ = self.lstm(feature_embedding)
        return self.out(output)
    

In [None]:
def lstm_train_loop(model, opt, loss_fn, dataloader):
    model.train()
    total_loss = 0
    for batch in dataloader:
        users, movies = batch['user_id'], batch['movie_id']
        genres, sex = batch['genres'], batch['sex']
        age, occupation = batch['age'], batch['occupation']
        ratings = batch['rating']
        users, movies =  torch.tensor(users).long().to(device), torch.tensor(movies).long().to(device)
        genres, sex = torch.tensor(genres).long().to(device), torch.tensor(sex).long().to(device)
        age, occupation = torch.tensor(age).long().to(device), torch.tensor(occupation).long().to(device)
        ratings = torch.tensor(ratings).float().to(device)

        pred = model(users, movies, genres, sex, age, occupation)
        loss = loss_fn(pred.squeeze(dim=-1), ratings)

        opt.zero_grad()
        loss.backward()
        opt.step()
    
        total_loss += loss.detach().item()
        
    return total_loss / len(dataloader)

In [None]:
def lstm_eval_loop(model, loss_fn, dataloader):
    model.eval()
    total_loss = 0
    total_acc = 0.0
    for batch in dataloader:
        users, movies = batch['user_id'], batch['movie_id']
        genres, sex = batch['genres'], batch['sex']
        age, occupation = batch['age'], batch['occupation']
        ratings = batch['rating']
        users, movies =  torch.tensor(users).long().to(device), torch.tensor(movies).long().to(device)
        genres, sex = torch.tensor(genres).long().to(device), torch.tensor(sex).long().to(device)
        age, occupation = torch.tensor(age).long().to(device), torch.tensor(occupation).long().to(device)
        ratings = torch.tensor(ratings).float().to(device) 

        pred = model(users, movies, genres, sex, age, occupation)
        loss = loss_fn(pred.squeeze(dim=-1), ratings)
        total_loss += loss.detach().item()

    return total_loss / len(dataloader)

In [None]:
lstm_model = LSTM_Model(
    model_dim=16, dropout_p=0.1
).to(device)
if os.path.exists('trained_lstm_10_epochs.pth'):
    lstm_model.load_state_dict(torch.load('trained_lstm_10_epochs.pth'))
lstm_model.train()
lstm_opt = torch.optim.SGD(lstm_model.parameters(), lr=0.01)
lstm_loss_fn = torch.nn.MSELoss()

In [None]:
def fit_lstm(model, opt, loss_fn, train_dataloader, val_dataloader, epochs):
    # Used for plotting later on
    train_loss_list, validation_loss_list = [], []
    val_accs = []
    
    print("Training and validating model")
    for epoch in range(epochs):
        print("-"*25, f"Epoch {epoch + 1}","-"*25)
        
        train_loss = lstm_train_loop(model, opt, loss_fn, train_dataloader)
        train_loss_list += [train_loss]
        
        validation_loss = lstm_eval_loop(model, loss_fn, val_dataloader)
        validation_loss_list += [validation_loss]
        
        print(f"Training MSE loss: {train_loss: .4f}")
        print(f"Validation MSE loss: {validation_loss: .4f}")
        
    return train_loss_list, validation_loss_list, val_accs

In [None]:
if not evaluation_mode:
    lstm_train_loss_list, lstm_validation_loss_list, lstm_v_accs = fit_lstm(lstm_model, lstm_opt, lstm_loss_fn, train_dataloader, val_dataloader, 10)

In [None]:
if not evaluation_mode:
    plt.plot(lstm_train_loss_list, label = "Train loss")
    plt.plot(lstm_validation_loss_list, label = "Validation loss")
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss vs Epoch')
    plt.legend()
    plt.show()

In [None]:
torch.save(lstm_model.state_dict(), 'trained_lstm_10_epochs.pth')

# Evaluation

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Transformer(
    model_dim=16, num_heads=2, num_encoder_layers=3, num_decoder_layers=3, dropout_p=0.1
).to(device)
#model.load_state_dict(torch.load('trained_trans_10_epochs.pth'))
model.eval()

Transformer(
  (positional_encoder): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (user_embedding): Embedding(6041, 16)
  (movie_embedding): Embedding(3953, 16)
  (genre_embedding): Embedding(301, 16)
  (sex_embedding): Embedding(2, 16)
  (age_embedding): Embedding(60, 16)
  (occupation_embedding): Embedding(21, 16)
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=16, out_features=16, bias=True)
          )
          (linear1): Linear(in_features=16, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=16, bias=True)
          (norm1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropo

In [None]:
lstm_model = LSTM_Model(
    model_dim=16, dropout_p=0.1
).to(device)
#lstm_model.load_state_dict(torch.load('trained_lstm_10_epochs.pth'))
lstm_model.eval()

LSTM_Model(
  (user_embedding): Embedding(6041, 16)
  (movie_embedding): Embedding(3953, 16)
  (genre_embedding): Embedding(301, 16)
  (sex_embedding): Embedding(2, 16)
  (age_embedding): Embedding(60, 16)
  (occupation_embedding): Embedding(21, 16)
  (lstm): LSTM(16, 32, num_layers=3, batch_first=True, dropout=0.1)
  (out): Linear(in_features=32, out_features=1, bias=True)
)

In [None]:
val_loss = validation_loop(model, loss_fn, val_dataloader)

  


In [None]:
l_val_loss = lstm_eval_loop(lstm_model, lstm_loss_fn, val_dataloader)

  # Remove the CWD from sys.path while we load stuff.
  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':
  del sys.path[0]


In [None]:
print("Accuracy LSTM: {} TRANS: {}".format(l_val_loss/10, val_loss/10))

Accuracy LSTM: 1.354762974015584 TRANS: 1.2122569652482762
