# Imports, paths, etc

In [11]:
import matplotlib.pyplot as plt
import torch
from torch.utils import data as torch_data
from torch import nn
import pandas as pd

from google.colab import drive
import importlib
from sys import path as modules_search_path
from sklearn import metrics

In [12]:
drive_path = "/content/drive"
drive.mount(drive_path)

base_path = drive_path + "/MyDrive/recsys2/"
data_path = base_path + "data/"
src_path = base_path + "ml_sources/"
modules_search_path.append(src_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
modules_search_path.append(src_path)
from recsys_pipeline.data import datasets, users_datasets_retriever
from recsys_pipeline.data_transform import data_preprocessor, id_idx_converter
from recsys_pipeline.managers import train_eval_managers, trainers, validators
from recsys_pipeline.models import mf_with_bias

my_modules = [datasets, users_datasets_retriever, data_preprocessor, 
              id_idx_converter, train_eval_managers, trainers, validators, 
              mf_with_bias]

import importlib
for module in my_modules:
    importlib.reload(module)

In [14]:
BATCH_SIZE = 256
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Run unit tests

In [66]:
%%bash

(cd /content/drive/MyDrive/recsys2/ml_sources
 ls
 python -m unittest discover -v -p *test_*.py
 )

prod
recsys_pipeline
tests
one to many:  tensor([343]) tensor([123, 123, 123, 123, 123, 123, 123, 123])
torch.Size([1]) torch.Size([8])


test_dataset_creation (tests.test_data.test_interact_dataset.TestInteractsDatasetCreation) ... ok
test_empty_dataset_creation (tests.test_data.test_interact_dataset.TestInteractsDatasetCreation) ... ok
test_dataset_iteration (tests.test_data.test_interact_dataset.TestInteractsDatasetIteration)
Test that we can iterate over dataset, and that every sample ... ok
test_empty_dataset_iteration (tests.test_data.test_interact_dataset.TestInteractsDatasetIteration) ... ok
test_init_and_call (tests.test_data.test_loader_build.TestStandardLoaderBuilder) ... ok
test_create_retriever (tests.test_data.test_users_datasets_retriever.TestUsersDatasetsRetriever) ... ok
test_retrieved_sample_shape_and_dtypes (tests.test_data.test_users_datasets_retriever.TestUsersDatasetsRetriever) ... ok
test_retrieved_samples_are_different (tests.test_data.test_users_datasets_retriever.TestUsersDatasetsRetriever) ... ok
test_transform_batch_shapes (tests.test_data_transform.test_data_preprocessor.TestDataPreprocessor)

# Data

In [None]:
anime_info = pd.read_csv(data_path + "anime.csv")
ratings = pd.read_csv(data_path + "rating.csv")

In [None]:
ratings = ratings[ratings["rating"] != -1].reset_index(drop=True)

In [None]:
user_id_idx_conv = id_idx_converter.IdIdxConverter(*ratings["user_id"].unique())
ratings["user_id"] = user_id_idx_conv.get_idxs(*ratings["user_id"])

item_id_idx_conv = id_idx_converter.IdIdxConverter(*ratings["anime_id"].unique())
ratings["anime_id"] = item_id_idx_conv.get_idxs(*ratings["anime_id"])

In [None]:
ratings.head()

Unnamed: 0,user_id,anime_id,rating
0,0,0,10
1,0,1,10
2,0,2,10
3,0,3,10
4,1,4,10


# interactive trash

In [None]:
a = pd.DataFrame({"a": 1, "b": 11}, index=[0])
b = pd.DataFrame({"a": 2, "b": 22}, index=[0])
pd.concat([a, b], ignore_index=True)

Unnamed: 0,a,b
0,1,11
1,2,22
