In [1]:
import sqlite3
import pickle
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from glob import glob
from tqdm import tqdm
from IPython.core.debugger import set_trace
from torch.utils.data import Dataset, DataLoader, ConcatDataset, random_split

In [2]:
torch.cuda.get_device_name(torch.cuda.current_device())

'GeForce GTX 960M'

In [3]:
np.random.seed(42)
device = torch.device("cuda" if torch.cuda.is_available() 
                                  else "cpu")

In [4]:
num_users, num_anime = (108709, 6668)
batch_size = 1024

In [8]:
# user_grouped_rating_files = [csv_file for csv_file in glob('datasets/user_grouped_ratings/user_grouped_ratings_processed*.pkl')]
# user_grouped_rating_files.sort()

user_grouped_rating_files = [f for f in glob('datasets/user_grouped_ratings/augmented_10_user_grouped_ratings_processed_*.db')]
user_grouped_rating_files.sort()

In [9]:
class AnimeRatingsDataset(Dataset):
    """Custom Dataset for loading entries from HDF5 databases"""

    def __init__(self, sqlite_file, transform=None):
        self.db = sqlite3.connect(sqlite_file)
        self.cursor = self.db.cursor()
        self.length = self.cursor.execute('SELECT count(blob) from augmented_data;').fetchone()[0]

    def extract_required_format(self, record):
        record_df = pd.DataFrame({'anime_id': record['anime_id'], 'my_score': record['my_score']})
        if len(record_df) > 5:
            #num_of_seq = np.random.randint(5, len(record_df))
            num_of_seq = 6
            indexes = np.random.choice(record_df.index, size=num_of_seq)
        else:
            num_of_seq = np.random.randint(2, len(record_df))
            indexes = np.random.choice(record_df.index, size=num_of_seq)
        train = record_df.iloc[indexes[:-1]]
        predict = record_df.iloc[indexes[-1:]]
        X = np.concatenate([
            [num_of_seq - 1],
            train['anime_id'].values,
            train['my_score'].values,
            predict['anime_id'].values
        ])
        y = predict['my_score'].values
        return X, y

    def __getitem__(self, index):
        if isinstance(index, torch.Tensor):
            index = int(index)
        row = self.cursor.execute('SELECT * from augmented_data where rowid=?', (index + 1, )).fetchone()
        return self.extract_required_format(pickle.loads(row[1]))

    def __len__(self):
        return self.length

In [10]:
total_dataset = ConcatDataset([AnimeRatingsDataset(f) for f in user_grouped_rating_files])

train_size = int(len(total_dataset) * 0.8)
test_size = int(len(total_dataset) * 0.2)
total = sum([train_size, test_size])
diff = len(total_dataset) - total
train_dataset, test_dataset = random_split(total_dataset, (train_size + diff, test_size))

train_dataloader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=0
)

test_dataloader = DataLoader(
    test_dataset, batch_size=batch_size, shuffle=True, num_workers=0
)

In [11]:
len(train_dataloader), len(test_dataloader)

(756, 189)

In [5]:
class Net(nn.Module):

    def __init__(self, anime_embedding_vocab, anime_embedding_dim, lstm_hidden_dim,
                 num_past_animes=5, batch_size=batch_size):
        super(Net, self).__init__()
        
        # Store all the constants.
        self.anime_embedding_vocab = anime_embedding_vocab
        self.anime_embedding_dim = anime_embedding_dim
        self.lstm_hidden_dim = lstm_hidden_dim
        self.num_past_animes = num_past_animes
        self.batch_size = batch_size

        self.past_anime_embedding = nn.Embedding(anime_embedding_vocab, anime_embedding_dim)
        self.embedding_drop = nn.Dropout(0.2)
        # LSTM is fed the concatenated output of past anime ratings with their respective embeddings.
        # It outputs the hidden state of size lstm_hidden_dim.
        # anime embedding_size + 1 would suffice as anime_embedding_size is already * number of past records
        self.lstm = nn.LSTM(anime_embedding_dim + 1, lstm_hidden_dim, bidirectional=True)

        # Take the LSTM hidden state for the past anime watched with the future anime embedding
        # as input to provide recommendation for the future anime.
        # Final Hidden cells state, hidden state hence * 2
        self.drop1 = nn.Dropout(0.2)
        # Bidirectional hence * 2
        self.ln1 = nn.LayerNorm((2 * lstm_hidden_dim * 2))
        # Bidirectional hence * 2
        self.fc1 = nn.Linear(lstm_hidden_dim * 2 * 2, self.anime_embedding_dim)
        self.ln2 = nn.LayerNorm((self.anime_embedding_dim))
        # Historical embeddings + lstm past state
        self.drop2 = nn.Dropout(0.6)
        self.fc2 = nn.Linear(self.anime_embedding_dim + self.anime_embedding_dim, 1)
        self.init_hidden(batch_size)

    def init_hidden(self, minibatch_size):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        self.hidden = (
            torch.zeros(2, minibatch_size, self.lstm_hidden_dim).to(device),
            torch.zeros(2, minibatch_size, self.lstm_hidden_dim).to(device)
        )

    def forward(self, x):
        num_past_records = 5
        past_anime_historical_ids = x[:, 1: num_past_records + 1]
        past_anime_ratings = x[:, num_past_records + 1:-1]
        future_anime_id = x[:, -1:]

        history_embeddings = self.past_anime_embedding(past_anime_historical_ids)
        drop_history_embeddings = self.embedding_drop(history_embeddings)
        future_embeddings = self.past_anime_embedding(future_anime_id)

        lstm_input = torch.cat([
            past_anime_ratings.view(-1, num_past_records, 1).permute(2, 1, 0).float(),
            drop_history_embeddings.permute(2, 1, 0)
        ]).permute(1, 2, 0)

        lstm_out, self.hidden = self.lstm(
            lstm_input,
            self.hidden
        )

        final_hidden_concat_state = torch.cat([
            self.hidden[0].permute(2, 1, 0),
            self.hidden[1].permute(2, 1, 0)
        ]).permute(1, 2, 0).contiguous().view(-1, self.lstm_hidden_dim * 2 * 2) # bidirectional hence * 2

        dropout1 = self.drop1(final_hidden_concat_state)
        ln1 = self.ln1(dropout1)
        historical_state = F.relu(self.fc1(ln1))
        ln2_historical_state = self.ln2(historical_state)        
        recommendation_input = torch.cat([
            future_embeddings.view(-1, self.anime_embedding_dim).permute(1, 0),
            ln2_historical_state.permute(1, 0)
        ]).permute(1, 0)

        dropout2 = self.drop2(recommendation_input)
        return self.fc2(dropout2)


model = Net(anime_embedding_dim=50, anime_embedding_vocab=num_anime, lstm_hidden_dim=256)
model.to(device)
print(model)

Net(
  (past_anime_embedding): Embedding(6668, 50)
  (embedding_drop): Dropout(p=0.2)
  (lstm): LSTM(51, 256, bidirectional=True)
  (drop1): Dropout(p=0.2)
  (ln1): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
  (fc1): Linear(in_features=1024, out_features=50, bias=True)
  (ln2): LayerNorm(torch.Size([50]), eps=1e-05, elementwise_affine=True)
  (drop2): Dropout(p=0.6)
  (fc2): Linear(in_features=100, out_features=1, bias=True)
)


In [6]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 10

In [16]:
train_loss = []
validation_loss = []
for epoch in range(num_epochs):
    print('Running epoch {}'.format(epoch + 1))
    train_epoch_loss = []
    validation_epoch_loss = []
    model = model.train()
    for param in model.parameters():
        param.requires_grad = True
    # Model Training
    for idx, (X, y) in enumerate(train_dataloader):
        current_batch_size = X.shape[0]
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        X = X.to(device)
        y = y.to(device)
        # Also, we need to clear out the hidden state of the LSTM,
        # detaching it from its history on the last instance.
        model.init_hidden(minibatch_size=current_batch_size)

        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Tensors of anime indices.
        #record = torch.from_numpy(np.array([ 3, 23, 43, 53,  5,  4,  3, 67], dtype=np.int64)).to(device)

        prediction = model(X)

        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = criterion(prediction, y.to(device).float())
        loss.backward()
        optimizer.step()
        train_epoch_loss.append(float(loss))
        if idx % 200 == 0:
            print('Batch {} - Training loss: {}'.format(idx + 1, loss))
        
        
    with torch.no_grad():
        model = model.eval()
        for param in model.parameters():
            param.requires_grad = False
        for idx, (X, y) in enumerate(test_dataloader):
            current_batch_size = X.shape[0]
            # Step 1. Remember that Pytorch accumulates gradients.
            # We need to clear them out before each instance
            model.zero_grad()

            X = X.to(device)
            y = y.to(device)
            # Also, we need to clear out the hidden state of the LSTM,
            # detaching it from its history on the last instance.
            model.init_hidden(current_batch_size)

            # Step 2. Get our inputs ready for the network, that is, turn them into
            # Tensors of anime indices.
            #record = torch.from_numpy(np.array([ 3, 23, 43, 53,  5,  4,  3, 67], dtype=np.int64)).to(device)

            prediction = model(X)

            # Step 4. Compute the loss, gradients, and update the parameters by
            #  calling optimizer.step()
            loss = criterion(prediction, y.to(device).float())
            validation_epoch_loss.append(float(loss))
            if idx % 200 == 0:
                print('Batch {} - Validation loss: {}'.format(idx + 1, loss))
        model = model.train()

    train_loss.append(np.mean(train_epoch_loss))
    validation_loss.append(np.mean(validation_epoch_loss))
    print('Epoch {}: Mean training loss: {} Mean validation loss: {}'.format(epoch + 1, train_loss[-1], validation_loss[-1]))

Running epoch 1
Batch 1 - Training loss: 10.55636978149414
Batch 201 - Training loss: 10.310909271240234
Batch 401 - Training loss: 10.577067375183105
Batch 601 - Training loss: 10.77558708190918
Batch 1 - Validation loss: 9.91697883605957
Epoch 1: Mean training loss: 10.391021227710462 Mean validation loss: 9.992473693121047
Running epoch 2
Batch 1 - Training loss: 9.915519714355469
Batch 201 - Training loss: 9.878466606140137
Batch 401 - Training loss: 11.039697647094727
Batch 601 - Training loss: 10.688434600830078
Batch 1 - Validation loss: 8.883062362670898
Epoch 2: Mean training loss: 10.39965880484808 Mean validation loss: 9.985361921724188
Running epoch 3
Batch 1 - Training loss: 10.542410850524902
Batch 201 - Training loss: 10.662090301513672
Batch 401 - Training loss: 10.596071243286133
Batch 601 - Training loss: 10.600316047668457
Batch 1 - Validation loss: 9.77141284942627
Epoch 3: Mean training loss: 10.388995049491761 Mean validation loss: 9.974109770759704
Running epoch 

In [18]:
torch.save(
    model,
    'augmented_files_{}_epochs_{}-{}.pt'.format(num_epochs, float(train_loss[-1]), float(validation_loss[-1]))
)

  "type " + obj.__name__ + ". It won't be checked "


In [8]:
# model = torch.load('augmented_files_10_epochs_10.383869513002022-10.021028261336069.pt')
# torch.save(model.state_dict(), 'augmented_files_10_epochs_10.383869513002022-10.021028261336069_state_dict.pt')
# model.to(device)

Net(
  (past_anime_embedding): Embedding(6668, 50)
  (embedding_drop): Dropout(p=0.2)
  (lstm): LSTM(51, 256, bidirectional=True)
  (drop1): Dropout(p=0.2)
  (ln1): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
  (fc1): Linear(in_features=1024, out_features=50, bias=True)
  (ln2): LayerNorm(torch.Size([50]), eps=1e-05, elementwise_affine=True)
  (drop2): Dropout(p=0.6)
  (fc2): Linear(in_features=100, out_features=1, bias=True)
)

In [15]:
# model = torch.load('augmented_files_10_epochs_10.43818097013645-9.978318759373256.pt')
# model.to(device)

Net(
  (past_anime_embedding): Embedding(6668, 50)
  (embedding_drop): Dropout(p=0.2)
  (lstm): LSTM(51, 256, bidirectional=True)
  (drop1): Dropout(p=0.2)
  (ln1): LayerNorm(torch.Size([1024]), eps=1e-05, elementwise_affine=True)
  (fc1): Linear(in_features=1024, out_features=50, bias=True)
  (ln2): LayerNorm(torch.Size([50]), eps=1e-05, elementwise_affine=True)
  (drop2): Dropout(p=0.6)
  (fc2): Linear(in_features=100, out_features=1, bias=True)
)

In [19]:
import json
mapping = json.load(open('datasets/processed_ratings/anime_user_rating_mapping.json'))
mapping.keys()

dict_keys(['idx2anime', 'user2idx', 'anime_titles', 'usernames2userid', 'idx2user', 'anime2idx'])

In [20]:
anime_id = [20, 21, 1, 121, 136]
titles = ['Naruto', 'One Piece', 'Cowboy Bebop', 'Fullmetal Alchemist', 'Hunter x Hunter']
idx = [mapping['anime2idx'][str(x)] for x in anime_id]
ratings = [7.5, 7.2, 8, 8.2, 8.5]
predicted = [9] # bleach

In [23]:
prediction_model = model.eval()
for param in prediction_model.parameters():
    param.requires_grad = False

def build_record(history_anime_id, history_ratings, new_anime):
    return np.concatenate([
        [5],
        history_anime_id[:5],
        history_ratings[:5],
        [new_anime]
    ])

def anime_recommendations(history_anime_id, history_ratings, new_anime):
    X = torch.from_numpy(np.array(
        build_record(history_anime_id, history_ratings, new_anime)
    ).reshape(1, -1)).to(device)
    with torch.no_grad():
        current_batch_size = X.shape[0]
        prediction_model.zero_grad()
        prediction_model.init_hidden(current_batch_size)
        result = prediction_model(X)
    return result

def obtain_top_n(history_anime_id, history_ratings, topn=10):
    watched = set(history_anime_id)
    anime_ratings = []
    for new_anime_idx in tqdm(range(num_anime)):
        if new_anime_idx not in watched:
            anime_ratings.append((
                new_anime_idx,
                float(anime_recommendations(history_anime_id, history_ratings, new_anime_idx)[0][0])
            ))
    top_anime_ratings = [
        (anime_idx, mapping['anime_titles'][str(mapping['idx2anime'][str(anime_idx)])],rating)
        for anime_idx, rating in sorted(anime_ratings, key=lambda x: x[1], reverse=True)[:topn]
    ]
    return top_anime_ratings
    

# anime_recommendations(
#     history_anime_id=[56, 0, 53, 65, 708],
#     history_ratings=[7, 7, 8, 8, 8],
#     new_anime=162
# )

obtain_top_n(
    history_anime_id=[56, 0, 53, 65, 708],
    history_ratings=[7, 7, 8, 8, 8],
    topn=10
)

100%|██████████| 6668/6668 [00:08<00:00, 788.91it/s]


[(73, 'Sen to Chihiro no Kamikakushi', 8.858256340026855),
 (105, 'Death Note', 8.656755447387695),
 (93, 'Howl no Ugoku Shiro', 8.480950355529785),
 (171, 'Shingeki no Kyojin', 8.381499290466309),
 (70, 'Mononoke Hime', 8.329761505126953),
 (196, 'One Punch Man', 8.30617618560791),
 (122, 'Code Geass: Hangyaku no Lelouch R2', 8.272485733032227),
 (202, 'Kimi no Na wa.', 8.199079513549805),
 (912, 'Tonari no Totoro', 8.078115463256836),
 (248, 'Final Fantasy VII: Advent Children', 8.040654182434082)]

In [17]:
# anime_list = pd.read_csv('datasets/anime_cleaned.csv')[['anime_id' ,'title', 'title_english']]
# anime_list['id'] = anime_list['anime_id'].map(lambda x: mapping['anime2idx'][str(x)])

# diff_names = anime_list[anime_list['title'] != anime_list['title_english']].dropna(subset=['title_english']).copy()
# diff_names['title'] = diff_names['title_english']
# result = pd.concat([anime_list, diff_names])

# result.to_json('anime_list.json', orient='records')

In [None]:
# for X, y in train_dataloader:
#     # Step 1. Remember that Pytorch accumulates gradients.
#     # We need to clear them out before each instance
#     model.zero_grad()

#     X = X.to(device)
#     y = y.to(device)
#     # Also, we need to clear out the hidden state of the LSTM,
#     # detaching it from its history on the last instance.
#     model.hidden = model.init_hidden()

#     # Step 2. Get our inputs ready for the network, that is, turn them into
#     # Tensors of anime indices.
#     #record = torch.from_numpy(np.array([ 3, 23, 43, 53,  5,  4,  3, 67], dtype=np.int64)).to(device)

#     prediction = model(X)

#     # Step 4. Compute the loss, gradients, and update the parameters by
#     #  calling optimizer.step()
#     loss = criterion(prediction, y.to(device).float())
#     loss.backward()
#     optimizer.step()
#     break

In [None]:
# class Net(nn.Module):

#     def __init__(self, anime_embedding_vocab, anime_embedding_dim, lstm_hidden_dim,
#                  num_past_animes=5, batch_size=batch_size):
#         super(Net, self).__init__()
        
#         # Store all the constants.
#         self.anime_embedding_vocab = anime_embedding_vocab
#         self.anime_embedding_dim = anime_embedding_dim
#         self.lstm_hidden_dim = lstm_hidden_dim
#         self.num_past_animes = num_past_animes
#         self.batch_size = batch_size

#         self.past_anime_embedding = nn.Embedding(anime_embedding_vocab, anime_embedding_dim)
#         self.embedding_drop = nn.Dropout(0.2)
#         # LSTM is fed the concatenated output of past anime ratings with their respective embeddings.
#         # It outputs the hidden state of size lstm_hidden_dim.
#         # anime embedding_size + 1 would suffice as anime_embedding_size is already * number of past records
#         self.lstm = nn.LSTM(anime_embedding_dim + 1, lstm_hidden_dim, bidirectional=False)

#         # Take the LSTM hidden state for the past anime watched with the future anime embedding
#         # as input to provide recommendation for the future anime.
#         # Final Hidden cells state, hidden state hence * 2
#         self.drop1 = nn.Dropout(0.2)
#         self.ln1 = nn.LayerNorm((2 * lstm_hidden_dim))
#         self.fc1 = nn.Linear(lstm_hidden_dim * 2, self.anime_embedding_dim)
#         self.ln2 = nn.LayerNorm((self.anime_embedding_dim))
#         # Historical embeddings + lstm past state
#         self.drop2 = nn.Dropout(0.6)
#         self.fc2 = nn.Linear(self.anime_embedding_dim + self.anime_embedding_dim, 1)
#         self.init_hidden(batch_size)

#     def init_hidden(self, minibatch_size):
#         # Before we've done anything, we dont have any hidden state.
#         # Refer to the Pytorch documentation to see exactly
#         # why they have this dimensionality.
#         # The axes semantics are (num_layers, minibatch_size, hidden_dim)
#         self.hidden = (
#             torch.zeros(1, minibatch_size, self.lstm_hidden_dim).to(device),
#             torch.zeros(1, minibatch_size, self.lstm_hidden_dim).to(device)
#         )

#     def forward(self, x):
#         num_past_records = 5
#         past_anime_historical_ids = x[:, 1: num_past_records + 1]
#         past_anime_ratings = x[:, num_past_records + 1:-1]
#         future_anime_id = x[:, -1:]

#         history_embeddings = self.past_anime_embedding(past_anime_historical_ids)
#         drop_history_embeddings = self.embedding_drop(history_embeddings)
#         future_embeddings = self.past_anime_embedding(future_anime_id)

#         lstm_input = torch.cat([
#             past_anime_ratings.view(-1, num_past_records, 1).permute(2, 1, 0).float(),
#             drop_history_embeddings.permute(2, 1, 0)
#         ]).permute(1, 2, 0)

#         lstm_out, self.hidden = self.lstm(
#             lstm_input,
#             self.hidden
#         )

#         final_hidden_concat_state = torch.cat([
#             self.hidden[0].view(-1, self.lstm_hidden_dim).permute(1, 0),
#             self.hidden[1].view(-1, self.lstm_hidden_dim).permute(1, 0)
#         ]).permute(1, 0)

#         dropout1 = self.drop1(final_hidden_concat_state)
#         ln1 = self.ln1(dropout1)
#         historical_state = F.relu(self.fc1(ln1))
#         ln2_historical_state = self.ln2(historical_state)        
#         recommendation_input = torch.cat([
#             future_embeddings.view(-1, self.anime_embedding_dim).permute(1, 0),
#             ln2_historical_state.permute(1, 0)
#         ]).permute(1, 0)

#         dropout2 = self.drop2(recommendation_input)
#         return self.fc2(dropout2)


# model = Net(anime_embedding_dim=50, anime_embedding_vocab=num_anime, lstm_hidden_dim=256)
# model.to(device)
# print(model)

In [None]:
# class Net(nn.Module):

#     def __init__(self, anime_embedding_vocab, anime_embedding_dim, lstm_hidden_dim, num_past_animes=5):
#         super(Net, self).__init__()
        
#         # Store all the constants.
#         self.anime_embedding_vocab = anime_embedding_vocab
#         self.anime_embedding_dim = anime_embedding_dim
#         self.lstm_hidden_dim = lstm_hidden_dim
#         self.num_past_animes = num_past_animes

#         # Take (num_past_animes) embedding inputs to learn the relation on how to predict
#         # Anime, given past history.
# #         self.past_anime_embeddings = []
# #         for _ in range(num_past_animes):
# #             self.past_anime_embeddings.append(nn.Embedding(anime_vocab, anime_embedding_dim))
#         self.past_anime_embedding = nn.Embedding(anime_embedding_vocab, anime_embedding_dim)

# #         # Embedding for the future anime which needs the rating to be predicted.
# #         self.anime_future_embedding = nn.Embedding(anime_embedding_vocab, anime_embedding_dim)

#         # Total embedding size for anime history.
# #         total_anime_embedding_size = num_past_animes * anime_embedding_dim
#         total_anime_embedding_size = anime_embedding_dim

#         # Total size for storing the ratings for past anime watched.
#         total_anime_rating_size = 1

#         # LSTM is fed the concatenated output of past anime ratings with their respective embeddings.
#         # It outputs the hidden state of size lstm_hidden_dim.
#         self.lstm = nn.LSTM(total_anime_embedding_size + total_anime_rating_size, lstm_hidden_dim)

#         # Take the LSTM hidden state for the past anime watched with the future anime embedding
#         # as input to provide recommendation for the future anime.
#         # Final Hidden cells state, hidden state hence * 2
#         self.fc1 = nn.Linear(lstm_hidden_dim * 2, self.anime_embedding_dim)
#         # Historical embeddings + lstm past state
#         self.fc2 = nn.Linear(self.anime_embedding_dim + self.anime_embedding_dim, 1)

#     def init_hidden(self):
#         # Before we've done anything, we dont have any hidden state.
#         # Refer to the Pytorch documentation to see exactly
#         # why they have this dimensionality.
#         # The axes semantics are (num_layers, minibatch_size, hidden_dim)
#         return (torch.zeros(1, 1, self.lstm_hidden_dim).to(device),
#                 torch.zeros(1, 1, self.lstm_hidden_dim).to(device))

#     def forward(self, x):
#         num_past_records = int(x[0])
#         past_anime_historical_ids = x[1: num_past_records + 1]
#         past_anime_ratings = x[num_past_records + 1:-1]
#         future_anime_id = x[-1:]
# #         embedding_outputs = []
# #         for i in range(self.num_past_animes):
# #             if num_past_records < i:
# #                 out = past_anime_embeddings[i](past_anime_historical_ids[i])
# #             else:
# #                 # The current record does not have as much as history
# #                 out = torch.zeros(1, self.anime_embedding_dim)
# #             embedding_outputs.append(out)
# #         total_past_anime_embeddings = torch.cat(embedding_outputs)

#         history_embeddings = self.past_anime_embedding(past_anime_historical_ids)
#         future_embeddings = self.past_anime_embedding(future_anime_id)

#         lstm_input = torch.cat([
#             past_anime_ratings.view(1, num_past_records).float(),
#             history_embeddings.permute(1, 0)
#         ]).permute(1, 0)

#         lstm_out, self.hidden = self.lstm(
#             lstm_input.view(num_past_records, 1, -1),
#             self.hidden
#         )
        
#         final_hidden_concat_state = torch.cat([
#             self.hidden[0].view(self.lstm_hidden_dim),
#             self.hidden[1].view(self.lstm_hidden_dim)
#         ])

#         historical_state = F.relu(self.fc1(final_hidden_concat_state))

# #         set_trace()
        
#         recommendation_input = torch.cat([
#             future_embeddings.view(self.anime_embedding_dim),
#             historical_state
#         ])

#         return self.fc2(recommendation_input)


# model = Net(anime_embedding_dim=50, anime_embedding_vocab=num_anime, lstm_hidden_dim=256)
# criterion = nn.MSELoss()
# optimizer = optim.Adam(model.parameters(), lr=1e-3)
# model.to(device)
# print(model)

In [None]:
# class Net(nn.Module):

#     def __init__(self, anime_embedding_vocab, anime_embedding_dim, lstm_hidden_dim, num_past_animes=5):
#         super(Net, self).__init__()
        
#         # Store all the constants.
#         self.anime_embedding_vocab = anime_embedding_vocab
#         self.anime_embedding_dim = anime_embedding_dim
#         self.lstm_hidden_dim = lstm_hidden_dim
#         self.num_past_animes = num_past_animes

#         self.past_anime_embedding = nn.Embedding(anime_embedding_vocab, anime_embedding_dim)

#         # LSTM is fed the concatenated output of past anime ratings with their respective embeddings.
#         # It outputs the hidden state of size lstm_hidden_dim.
#         # anime embedding_size + 1 would suffice as anime_embedding_size is already * number of past records
#         self.lstm = nn.LSTM(anime_embedding_dim + 1, lstm_hidden_dim)

#         # Take the LSTM hidden state for the past anime watched with the future anime embedding
#         # as input to provide recommendation for the future anime.
#         # Final Hidden cells state, hidden state hence * 2
#         self.fc1 = nn.Linear(lstm_hidden_dim * 2, self.anime_embedding_dim)

#         # Historical embeddings + lstm past state
#         self.fc2 = nn.Linear(self.anime_embedding_dim + self.anime_embedding_dim, 1)

#     def init_hidden(self):
#         # Before we've done anything, we dont have any hidden state.
#         # Refer to the Pytorch documentation to see exactly
#         # why they have this dimensionality.
#         # The axes semantics are (num_layers, minibatch_size, hidden_dim)
#         return (torch.zeros(1, 1, self.lstm_hidden_dim).to(device),
#                 torch.zeros(1, 1, self.lstm_hidden_dim).to(device))

#     def forward(self, x):
#         num_past_records = int(x[0])
#         past_anime_historical_ids = x[1: num_past_records + 1]
#         past_anime_ratings = x[num_past_records + 1:-1]
#         future_anime_id = x[-1:]

#         history_embeddings = self.past_anime_embedding(past_anime_historical_ids)
#         future_embeddings = self.past_anime_embedding(future_anime_id)

#         lstm_input = torch.cat([
#             past_anime_ratings.view(1, num_past_records).float(),
#             history_embeddings.permute(1, 0)
#         ]).permute(1, 0)

#         lstm_out, self.hidden = self.lstm(
#             lstm_input.view(num_past_records, 1, -1),
#             self.hidden
#         )
        
#         final_hidden_concat_state = torch.cat([
#             self.hidden[0].view(self.lstm_hidden_dim),
#             self.hidden[1].view(self.lstm_hidden_dim)
#         ])

#         historical_state = F.relu(self.fc1(final_hidden_concat_state))
        
#         recommendation_input = torch.cat([
#             future_embeddings.view(self.anime_embedding_dim),
#             historical_state
#         ])

#         return self.fc2(recommendation_input)


# model = Net(anime_embedding_dim=50, anime_embedding_vocab=num_anime, lstm_hidden_dim=256)
# model.to(device)
# print(model)