In [1]:
import os
import torch
import pickle
import warnings
import numpy as np
import pandas as pd

from torch import nn
from tqdm.notebook import tqdm
from transformers import pipeline, get_scheduler
from torch.utils.data import Dataset, DataLoader

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from learningLoop import learning_loop
from dataset import MovieDataset, collate_fn
from model import movieModel, create_model_and_optimizer, load_glove_weights

In [3]:
dt = pd.read_csv("data.csv")

In [4]:
with open('tok_to_ind.pkl', 'rb') as f:
    tok_to_ind = pickle.load(f)

In [5]:
np.random.seed(777)
proportion = 0.75
for_train = np.random.choice([1, 0], size=len(dt), p=[proportion, 1 - proportion])

In [6]:
dt_for_train = dt.iloc[for_train]
dt_for_test = dt.iloc[1 - for_train]

In [7]:
ds_train = MovieDataset(
    dt_for_train, 
    sorted(dt_for_train.userId.unique()),
    tok_to_ind,
    size = 16,
    count_with_rating=4
)
ds_val = MovieDataset(
    dt_for_test, 
    sorted(dt_for_test.userId.unique()), 
    tok_to_ind,
    size = 16,
    count_with_rating=4
)

In [8]:
batch_size = 8

dataloader_train = DataLoader(
    dataset=ds_train,
    batch_size=batch_size,
    collate_fn=collate_fn,
    shuffle=True,
    drop_last=True,
)

dataloader_val = DataLoader(
    dataset=ds_val,
    batch_size=batch_size,
    collate_fn=collate_fn,
    shuffle=False,
    drop_last=False,
)

In [9]:
glove_path = "./glove.840B.300d.txt"
glove_weights, mask_found = load_glove_weights(glove_path, tok_to_ind, "[PAD]")

Loading Glove Weights


  0%|          | 0/2196018 [00:00<?, ?it/s]

7515 words from vocab of size 7912 loaded!


In [10]:
user_encoder_info = {
    "embedding_word_size": 300,
    "output_embedding_size": 100,
    "token_count": 18,
    "movie_count": 16,
    "num_layers": 3
}
movie_encoder_info = {
    "input_embedding_size": 100,
    "output_embedding_size": 100,
    "user_count": 8,
    "num_layers": 3
}
params = {
    "user_enc_info": user_encoder_info,
    "movie_enc_info": movie_encoder_info,
    "tok_to_ind": tok_to_ind,
    "glove_weights": glove_weights
}

In [None]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
model, optimizer = create_model_and_optimizer(
    model_class = movieModel,
    model_params = params,
    device = device,
)
    
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.25, patience=4, threshold=0.001, verbose=True)
   

model, optimizer, losses, metrics = learning_loop(
    model = model,
    optimizer = optimizer,
    train_loader = dataloader_train,
    val_loader = dataloader_val,
    criterion = None,
    device = device,
    scheduler = scheduler,
    epochs = 100,
    min_lr = 1e-7,
    val_every = 1,
    draw_every = 1,
    separate_show = False,
    metric_names = {
        "accuracy": {"plot_id": 1},
    },
    chkp_folder = "./chkp",
    model_name = "movelV1"
)

#1/100:


  0%|          | 0/1889058 [00:00<?, ?it/s]

In [13]:
a = torch.Tensor([0, 1, 2, 3, 4])
a= a.cuda()

In [14]:
a

tensor([0., 1., 2., 3., 4.], device='cuda:0')