In [1]:
from src import pipeline, autoencoder, training_autoencoder, data_containers
import torch
import time
import numpy as np

BATCH_SIZE = 4096

class EarlyStopper:

    def __init__(self, patience: int):
        self.patience_left = patience
        self.patience = patience
        self.best_loss = float("inf")

    def should_stop(self, loss: float) -> bool:
        self.patience_left -= 1
        if self.patience_left == 0:
            return True
        if loss < self.best_loss:
            self.best_loss = loss
            self.patience_left = self.patience
        return False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

user_data, movies = pipeline.clean_data()
formatted_user_data = user_data.to_pandas()
train, test = training_autoencoder.train_test_split(formatted_user_data, 0.1)

model = autoencoder.AutoEncoder(n_movies=len(movies)).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

stopper = EarlyStopper(patience=5)

train_losses = []
test_losses = []
train_losses = []

for epoch in range(1000):

    train = train.sample(frac=1) # shuffle

    total_train_loss = 0
    total_test_loss = 0
    current_index = 0
    epoch_start = time.time()

    while current_index < len(train):
        batch_data = train.iloc[current_index:(current_index+BATCH_SIZE)]
        current_index += BATCH_SIZE
        batch = training_autoencoder.batch_from_user_ratings(batch_data, len(movies), device)
        model_out = model(batch.rating_vectors)
        loss = ((batch.rating_vectors - model_out*batch.relevancy_vectors) ** 2).sum()
        total_train_loss += loss.item()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    current_index = 0
    model.eval()
    while current_index < len(test):
        batch_data = test.iloc[current_index:(current_index+BATCH_SIZE)]
        current_index += BATCH_SIZE
        batch = training_autoencoder.batch_from_user_ratings(batch_data, len(movies), device)
        with torch.no_grad():
            model_out = model(batch.rating_vectors)
            loss = ((batch.rating_vectors - model_out*batch.relevancy_vectors) ** 2).sum()
        total_test_loss += loss.item()
    model.train()

    total_train_loss = round(total_train_loss / len(train), 6)
    total_test_loss = round(total_test_loss / len(test), 6)

    train_losses.append(total_train_loss)
    test_losses.append(total_test_loss)

    if total_test_loss < stopper.best_loss:
        torch.save(model, f"model_autoencoder.pt")

    if stopper.should_stop(total_test_loss):
        print("Early Stopping")
        break

    epoch_time = int(time.time() - epoch_start)
    print(f"[EPOCH {epoch}] Total Training Loss: {total_train_loss:.04f} | Total Test Loss: {total_test_loss:.04f} | Time: {epoch_time}s")


Cleaning data...
Data cleaned!
[EPOCH 0] Total Training Loss: 172.3602 | Total Test Loss: 159.0798 | Time: 128s
[EPOCH 1] Total Training Loss: 153.7741 | Total Test Loss: 151.8943 | Time: 131s
[EPOCH 2] Total Training Loss: 148.2539 | Total Test Loss: 148.6947 | Time: 160s


In [5]:
batch[2,-1]

tensor(0.9537, device='cuda:0')

In [6]:
model(batch)[2,-1]

tensor(0.2312, device='cuda:0', grad_fn=<SelectBackward0>)

In [3]:

optim.step()
optim.zero_grad()

Unnamed: 0,user,film,relative_rating
197184,2598093,"[3, 12, 23, 57, 67, 80, 82, 98, 101, 123, 128,...","[0.533950617283951, 0.533950617283951, 0.53395..."
334707,881975,"[2, 4, 11, 16, 17, 19, 21, 22, 26, 30, 37, 39,...","[0.43285714285714283, 0.43285714285714283, 0.4..."
176033,775436,"[2, 3, 9, 16, 31, 53, 56, 79, 82, 97, 115, 119...","[-0.6041666666666665, 0.3958333333333335, -2.6..."
228888,1371375,"[9, 17, 35, 42, 46, 53, 54, 61, 66, 82, 107, 1...","[-0.16470588235294148, -0.16470588235294148, 0..."
161627,1772011,"[3, 17, 25, 31, 34, 41, 42, 43, 53, 56, 68, 82...","[1.1020408163265305, 0.1020408163265305, -0.89..."
...,...,...,...
128436,1287688,"[2, 3, 12, 13, 16, 19, 25, 30, 31, 34, 35, 36,...","[0.9285714285714288, 0.9285714285714288, -0.07..."
189157,2405485,"[3, 13, 16, 35, 36, 38, 48, 66, 85, 111, 119, ...","[-1.1602787456445993, 0.8397212543554007, 0.83..."
193082,704493,"[3, 16, 17, 30, 42, 53, 54, 56, 57, 82, 87, 96...","[0.4501845018450181, 0.4501845018450181, 0.450..."
92806,1663782,"[2, 3, 5, 17, 21, 23, 31, 39, 57, 63, 64, 66, ...","[0.9134020618556704, -0.08659793814432959, -1...."


Unnamed: 0,user,film,relative_rating
0,218880,"[52, 120, 122, 299, 457, 586, 953, 1229, 1508,...","[-1.7000000000000002, 1.2999999999999998, 0.29..."
1,1746912,"[3, 12, 20, 26, 35, 36, 37, 57, 66, 70, 77, 81...","[0.24124513618677046, -0.7587548638132295, 0.2..."
2,810432,"[3, 16, 17, 20, 39, 43, 62, 104, 129, 231, 244...","[-1.0087719298245617, -0.008771929824561653, -..."
3,2340288,"[10, 55, 66, 123, 157, 165, 180, 201, 219, 226...","[-1.7173913043478262, 0.28260869565217384, -0...."
4,731392,"[261, 827, 832, 870, 950, 987, 1066, 1253, 140...","[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, -2.0,..."
...,...,...,...
465602,2170591,"[123, 145, 278, 327, 468, 501, 539, 543, 550, ...","[0.9534883720930232, 0.9534883720930232, 0.953..."
465603,2090815,"[8, 16, 97, 120, 129, 136, 149, 201, 202, 252,...","[0.3958333333333335, -0.6041666666666665, -0.6..."
465604,318239,"[108, 129, 195, 252, 262, 434, 513, 579, 896, ...","[-0.3888888888888893, 0.6111111111111107, 0.61..."
465605,355007,"[22, 34, 55, 66, 72, 82, 87, 119, 123, 149, 19...","[0.6440677966101696, 1.6440677966101696, 0.644..."
