In [1]:
import torch
import pickle
import pandas as pd
import numpy as np
from src.train import *
from src.processing import *
from src.models import *
from src.inference import *
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertModel
from torch.utils.data import Dataset, DataLoader

torch.manual_seed(42)
np.random.seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

torch.cuda.empty_cache()

Using device: cuda


In [2]:
vocab = Vocabulary()
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')
ratings_df, movie_descriptions, movies_metadata = create_ratings_df(
    number_of_movies=7500,
    frac=0.01,
    links_path='CLIP4Rec/archive/links.csv',
    movies_metadata_path='CLIP4Rec/archive/movies_metadata.csv',
    ratings_path='CLIP4Rec/archive/ratings.csv'
    )
sequences = get_sequences(ratings_df)
vocab.build_vocab(sequences)

train_sentences, val_sentences = train_test_split(sequences, test_size=0.2, random_state=42)
train_data, film_descriptions_encoded = prepare_dataset(
    train_sentences, movie_descriptions, tokenizer, vocab, encode_descriptions=True
)
val_data = prepare_dataset(
    val_sentences, movie_descriptions, tokenizer, vocab
)

train_dataset = FilmRecommendationDataset(train_data, film_descriptions_encoded)
val_dataset = FilmRecommendationDataset(val_data, film_descriptions_encoded)

epochs = 2
batch_size = 24
lr = 0.0001

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)



  movies_metadata = pd.read_csv(movies_metadata_path)
100%|██████████| 8846/8846 [00:00<00:00, 471536.04it/s]
100%|██████████| 4012/4012 [00:04<00:00, 977.49it/s] 
100%|██████████| 2212/2212 [00:00<00:00, 452046.41it/s]


In [3]:
film_encoder = SASFilmEncoder(item_num=len(vocab.word_to_index), seq_len=seq_len, embed_dim=384, device=device)
text_encoder = TextEncoder(bert_model, output_dim=384)

In [4]:
train_clip(film_encoder, text_encoder, train_loader, val_loader, 
           epochs=epochs, lr=lr, device=device, iter_verbose=50)

 14%|█▎        | 50/369 [00:30<03:12,  1.66it/s]

Epoch 1, Batch 50
Accuracy: 0.0067
Agreggated loss: 3.0922
Classification loss: 8.2004
Contrastive loss: 1.3959



 27%|██▋       | 100/369 [01:01<02:43,  1.65it/s]

Epoch 1, Batch 100
Accuracy: 0.0083
Agreggated loss: 3.0266
Classification loss: 7.9687
Contrastive loss: 1.3042



 41%|████      | 150/369 [01:31<02:13,  1.64it/s]

Epoch 1, Batch 150
Accuracy: 0.0058
Agreggated loss: 2.9864
Classification loss: 7.8433
Contrastive loss: 1.2462



 54%|█████▍    | 200/369 [02:01<01:42,  1.64it/s]

Epoch 1, Batch 200
Accuracy: 0.0092
Agreggated loss: 2.9589
Classification loss: 7.7503
Contrastive loss: 1.2089



 68%|██████▊   | 250/369 [02:32<01:12,  1.65it/s]

Epoch 1, Batch 250
Accuracy: 0.0058
Agreggated loss: 2.9383
Classification loss: 7.6888
Contrastive loss: 1.1794



 81%|████████▏ | 300/369 [03:02<00:42,  1.64it/s]

Epoch 1, Batch 300
Accuracy: 0.0033
Agreggated loss: 2.9227
Classification loss: 7.6505
Contrastive loss: 1.1555



 95%|█████████▍| 350/369 [03:33<00:11,  1.64it/s]

Epoch 1, Batch 350
Accuracy: 0.0083
Agreggated loss: 2.9112
Classification loss: 7.6335
Contrastive loss: 1.1351



100%|██████████| 369/369 [03:44<00:00,  1.64it/s]


Epoch 1, Batch 369
Accuracy: 0.0022
Agreggated loss: 2.9064
Classification loss: 7.6210
Contrastive loss: 1.1278



100%|██████████| 93/93 [00:19<00:00,  4.74it/s]


Epoch 1: Val Loss: 2.8091, Val Accuracy: 0.0059
Val Classification loss: 7.4839
Val Contrastive loss: 0.9580


 14%|█▎        | 50/369 [00:30<03:14,  1.64it/s]

Epoch 2, Batch 50
Accuracy: 0.0083
Agreggated loss: 2.7671
Classification loss: 7.0671
Contrastive loss: 0.9740



 27%|██▋       | 100/369 [01:00<02:44,  1.64it/s]

Epoch 2, Batch 100
Accuracy: 0.0050
Agreggated loss: 2.7710
Classification loss: 7.1230
Contrastive loss: 0.9682



 41%|████      | 150/369 [01:31<02:13,  1.64it/s]

Epoch 2, Batch 150
Accuracy: 0.0050
Agreggated loss: 2.7686
Classification loss: 7.1165
Contrastive loss: 0.9649



 54%|█████▍    | 200/369 [02:01<01:42,  1.64it/s]

Epoch 2, Batch 200
Accuracy: 0.0117
Agreggated loss: 2.7636
Classification loss: 7.0959
Contrastive loss: 0.9600



 68%|██████▊   | 250/369 [02:32<01:12,  1.64it/s]

Epoch 2, Batch 250
Accuracy: 0.0125
Agreggated loss: 2.7603
Classification loss: 7.0882
Contrastive loss: 0.9556



 81%|████████▏ | 300/369 [03:02<00:41,  1.64it/s]

Epoch 2, Batch 300
Accuracy: 0.0042
Agreggated loss: 2.7575
Classification loss: 7.0796
Contrastive loss: 0.9521



 95%|█████████▍| 350/369 [03:33<00:11,  1.64it/s]

Epoch 2, Batch 350
Accuracy: 0.0108
Agreggated loss: 2.7564
Classification loss: 7.0833
Contrastive loss: 0.9490



100%|██████████| 369/369 [03:44<00:00,  1.64it/s]


Epoch 2, Batch 369
Accuracy: 0.0067
Agreggated loss: 2.7554
Classification loss: 7.0799
Contrastive loss: 0.9480



100%|██████████| 93/93 [00:19<00:00,  4.72it/s]

Epoch 2: Val Loss: 2.7858, Val Accuracy: 0.0081
Val Classification loss: 7.5476
Val Contrastive loss: 0.8997





In [6]:
torch.save(film_encoder.state_dict(), 'CLIP4Rec/artifacts/film_encoder_weights_test.pth')
torch.save(text_encoder.state_dict(), 'CLIP4Rec/artifacts/text_encoder_weights_test.pth')

torch.save(train_dataset, 'CLIP4Rec/artifacts/train_dataset.pt')
torch.save(val_dataset, 'CLIP4Rec/artifacts/val_dataset.pt')

with open('CLIP4Rec/artifacts/ratings_df.pickle', 'wb') as f:
  pickle.dump(ratings_df, f)

with open('CLIP4Rec/artifacts/movie_descriptions.pickle', 'wb') as f:
  pickle.dump(movie_descriptions, f)

with open('CLIP4Rec/artifacts/sequences.pickle', 'wb') as f:
  pickle.dump(sequences, f)

with open('CLIP4Rec/artifacts/vocab.pickle', 'wb') as f:
  pickle.dump(vocab, f)

with open('CLIP4Rec/artifacts/film_descriptions_encoded.pickle', 'wb') as f:
  pickle.dump(film_descriptions_encoded, f)

with open('CLIP4Rec/artifacts/movies_metadata.pickle', 'wb') as f:
  pickle.dump(movies_metadata, f)

In [5]:
list_movies = ["Only Lovers Left Alive",
               "The Twilight Saga: Eclipse",
               "Me Before You",
               "(500) Days of Summer"]

In [1]:
vocab = pd.read_pickle('CLIP4Rec/artifacts/vocab.pickle')
movies_metadata = pd.read_pickle('CLIP4Rec/artifacts/movies_metadata.pickle')
film_descriptions_encoded = pd.read_pickle('CLIP4Rec/artifacts/film_descriptions_encoded.pickle')
bert_model = BertModel.from_pretrained('bert-base-uncased')
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

inference = Inference(
    film_encoder_path = 'CLIP4Rec/artifacts/film_encoder_weights_test.pth',
    text_encoder_path = 'CLIP4Rec/artifacts/text_encoder_weights_test.pth',
    vocab=vocab,
    dim=384,
    movies_metadata=movies_metadata,
    seq_len=seq_len,
    device=device,
    bert_model=bert_model,
    bert_tokenizer=bert_tokenizer,
)

In [2]:
inference.build_annoy_model(film_descriptions_encoded, num_trees=10)