In [1]:
import torch
import pickle
import pandas as pd
import numpy as np
from src.train import *
from src.processing import *
from src.models import *
from src.inference import *
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertModel
from torch.utils.data import Dataset, DataLoader

torch.manual_seed(42)
np.random.seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

torch.cuda.empty_cache()

Using device: cuda


In [2]:
vocab = Vocabulary()
ratings_df, movie_descriptions, movies_metadata = create_ratings_df(
    number_of_movies=7500,
    links_path='CLIP4Rec/archive/links.csv',
    movies_metadata_path='CLIP4Rec/archive/movies_metadata.csv',
    ratings_path='CLIP4Rec/archive/ratings.csv'
    )
sequences = get_sequences(ratings_df)
vocab.build_vocab(sequences)

train_sentences, val_sentences = train_test_split(sequences, test_size=0.2, random_state=42)

train_dataset = MoviesDataset(train_sentences, vocab)
val_dataset = MoviesDataset(val_sentences, vocab)


collate_fn = CollateFunction(pad_idx=vocab.word_to_idx("<PAD>"))

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, collate_fn=collate_fn)
val_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=False, collate_fn=collate_fn)

epochs = 25
batch_size = 128
lr = 0.0001

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

  movies_metadata = pd.read_csv(movies_metadata_path)


In [3]:
film_encoder = SASFilmEncoder(item_num=len(vocab.word_to_index), seq_len=seq_len, embed_dim=384, device=device)

In [4]:
train_recommender(film_encoder, train_dataloader, val_dataloader, epochs=epochs, lr=lr, device=device)

Epoch 1



100%|██████████| 6759/6759 [01:19<00:00, 84.60it/s]


Train Loss: 6.6708
Train Accuracy: 0.0235

Val Loss: 6.3416
Val Accuracy: 0.0319

Epoch 2



100%|██████████| 6759/6759 [01:19<00:00, 84.73it/s]


Train Loss: 6.2196
Train Accuracy: 0.0368

Val Loss: 6.1855
Val Accuracy: 0.0385

Epoch 3



100%|██████████| 6759/6759 [01:20<00:00, 84.20it/s]


Train Loss: 6.0263
Train Accuracy: 0.0442

Val Loss: 6.1166
Val Accuracy: 0.0420

Epoch 4



100%|██████████| 6759/6759 [01:20<00:00, 83.51it/s]


Train Loss: 5.8654
Train Accuracy: 0.0508

Val Loss: 6.0999
Val Accuracy: 0.0443

Epoch 5



100%|██████████| 6759/6759 [01:20<00:00, 83.80it/s]


Train Loss: 5.7107
Train Accuracy: 0.0575

Val Loss: 6.1048
Val Accuracy: 0.0457

Epoch 6



100%|██████████| 6759/6759 [01:20<00:00, 83.70it/s]


Train Loss: 5.5565
Train Accuracy: 0.0655

Val Loss: 6.1408
Val Accuracy: 0.0459

Epoch 7



100%|██████████| 6759/6759 [01:20<00:00, 83.74it/s]


Train Loss: 5.4003
Train Accuracy: 0.0745

Val Loss: 6.1927
Val Accuracy: 0.0454

Epoch 8



100%|██████████| 6759/6759 [01:20<00:00, 83.90it/s]


Train Loss: 5.2425
Train Accuracy: 0.0845

Val Loss: 6.2658
Val Accuracy: 0.0447

Epoch 9



100%|██████████| 6759/6759 [01:20<00:00, 84.07it/s]


Train Loss: 5.0829
Train Accuracy: 0.0960

Val Loss: 6.3445
Val Accuracy: 0.0442

Epoch 10



100%|██████████| 6759/6759 [01:20<00:00, 83.82it/s]


Train Loss: 4.9248
Train Accuracy: 0.1091

Val Loss: 6.4370
Val Accuracy: 0.0433

Epoch 11



100%|██████████| 6759/6759 [01:20<00:00, 83.81it/s]


Train Loss: 4.7709
Train Accuracy: 0.1231

Val Loss: 6.5491
Val Accuracy: 0.0414

Epoch 12



100%|██████████| 6759/6759 [01:20<00:00, 83.64it/s]


Train Loss: 4.6241
Train Accuracy: 0.1371

Val Loss: 6.6668
Val Accuracy: 0.0418

Epoch 13



100%|██████████| 6759/6759 [01:20<00:00, 84.16it/s]


Train Loss: 4.4822
Train Accuracy: 0.1517

Val Loss: 6.7783
Val Accuracy: 0.0404

Epoch 14



100%|██████████| 6759/6759 [01:20<00:00, 83.96it/s]


Train Loss: 4.3488
Train Accuracy: 0.1666

Val Loss: 6.8822
Val Accuracy: 0.0398

Epoch 15



100%|██████████| 6759/6759 [01:20<00:00, 83.82it/s]


Train Loss: 4.2240
Train Accuracy: 0.1804

Val Loss: 7.0050
Val Accuracy: 0.0389

Epoch 16



100%|██████████| 6759/6759 [01:20<00:00, 83.89it/s]


Train Loss: 4.1070
Train Accuracy: 0.1943

Val Loss: 7.1194
Val Accuracy: 0.0381

Epoch 17



100%|██████████| 6759/6759 [01:20<00:00, 83.91it/s]


Train Loss: 3.9959
Train Accuracy: 0.2074

Val Loss: 7.2326
Val Accuracy: 0.0373

Epoch 18



100%|██████████| 6759/6759 [01:20<00:00, 83.88it/s]


Train Loss: 3.8950
Train Accuracy: 0.2199

Val Loss: 7.3318
Val Accuracy: 0.0365

Epoch 19



100%|██████████| 6759/6759 [01:20<00:00, 83.81it/s]


Train Loss: 3.7972
Train Accuracy: 0.2323

Val Loss: 7.4392
Val Accuracy: 0.0364

Epoch 20



100%|██████████| 6759/6759 [01:20<00:00, 83.70it/s]


Train Loss: 3.7105
Train Accuracy: 0.2425

Val Loss: 7.5325
Val Accuracy: 0.0357

Epoch 21



100%|██████████| 6759/6759 [01:20<00:00, 83.85it/s]


Train Loss: 3.6252
Train Accuracy: 0.2537

Val Loss: 7.6324
Val Accuracy: 0.0359

Epoch 22



100%|██████████| 6759/6759 [01:20<00:00, 83.78it/s]


Train Loss: 3.5487
Train Accuracy: 0.2641

Val Loss: 7.7439
Val Accuracy: 0.0356

Epoch 23



100%|██████████| 6759/6759 [01:20<00:00, 83.88it/s]


Train Loss: 3.4801
Train Accuracy: 0.2728

Val Loss: 7.8124
Val Accuracy: 0.0349

Epoch 24



100%|██████████| 6759/6759 [01:20<00:00, 83.91it/s]


Train Loss: 3.4126
Train Accuracy: 0.2822

Val Loss: 7.9112
Val Accuracy: 0.0343

Epoch 25



100%|██████████| 6759/6759 [01:20<00:00, 84.04it/s]


Train Loss: 3.3499
Train Accuracy: 0.2901

Val Loss: 7.9901
Val Accuracy: 0.0344



In [7]:
print(1)

1


In [6]:
# torch.save(film_encoder.state_dict(), 'CLIP4Rec/artifacts/film_encoder_weights_test.pth')
# torch.save(text_encoder.state_dict(), 'CLIP4Rec/artifacts/text_encoder_weights_test.pth')

# torch.save(train_dataset, 'CLIP4Rec/artifacts/train_dataset.pt')
# torch.save(val_dataset, 'CLIP4Rec/artifacts/val_dataset.pt')

# with open('CLIP4Rec/artifacts/ratings_df.pickle', 'wb') as f:
#   pickle.dump(ratings_df, f)

# with open('CLIP4Rec/artifacts/movie_descriptions.pickle', 'wb') as f:
#   pickle.dump(movie_descriptions, f)

# with open('CLIP4Rec/artifacts/sequences.pickle', 'wb') as f:
#   pickle.dump(sequences, f)

# with open('CLIP4Rec/artifacts/vocab.pickle', 'wb') as f:
#   pickle.dump(vocab, f)

# with open('CLIP4Rec/artifacts/film_descriptions_encoded.pickle', 'wb') as f:
#   pickle.dump(film_descriptions_encoded, f)

# with open('CLIP4Rec/artifacts/movies_metadata.pickle', 'wb') as f:
#   pickle.dump(movies_metadata, f)

In [5]:
list_movies = ["Only Lovers Left Alive",
               "The Twilight Saga: Eclipse",
               "Me Before You",
               "(500) Days of Summer"]