In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install tez

In [None]:
df = pd.read_csv('/kaggle/input/predict-movie-ratings/train.csv')

In [None]:
df

In [None]:
df.user.nunique()

In [None]:
df.movie.nunique()

In [None]:
df.rating.value_counts()

In [None]:
import tez
import pandas as pd
from sklearn import model_selection
import torch
import torch.nn as nn
from sklearn import metrics
import numpy as np
from sklearn import preprocessing

class MovieDataset:
    
    def __init__(self, users, movies, ratings):
        
        self.users = users
        self.movies = movies
        self.ratings = ratings
        
    def __len__(self):
        # Return no. of samples
        return len(self.users)
        
    def __getitem__(self, item):
        
        user = self.users[item]
        movie = self.movies[item]
        rating = self.ratings[item]
        
        return {
            "users": torch.tensor(user, dtype = torch.long),
            "movies": torch.tensor(movie, dtype = torch.long),
            "ratings": torch.tensor(rating, dtype = torch.float)
        }
    

    
class RecSysModel(tez.Model):
    
    def __init__(self, num_users, num_movies):
        
        super().__init__()
        self.user_embed = nn.Embedding(num_users, 32)
        self.movie_embed = nn.Embedding(num_movies, 32)
        self.out = nn.Linear(64, 1)
        self.step_scheduler_after = 'epoch'
        
    def fetch_optimizer(self):
        
        opt = torch.optim.Adam(self.parameters(), lr = 1e-3)
        return opt
    
    def fetch_scheduler(self):
        
        sch = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size = 3, gamma = 0.7)
        
        return sch
        
    def monitor_metrics(self, output, rating):
        
        # With detach, it returns a new Tensor detached from the original one. 
        
        output = output.detach().cpu().numpy()
        rating = rating.detach().cpu().numpy()
        
        return {
            'rmse': np.sqrt(metrics.mean_squared_error(rating, output))
        }
        
    def forward(self, users, movies, ratings):
        
        user_embeds = self.user_embed(users)
        movie_embeds = self.movie_embed(movies)

        output = torch.cat([user_embeds, movie_embeds], dim = 1)
        
        output = self.out(output)
        
            
        loss = nn.MSELoss()(output, ratings.view(-1, 1))
        calc_metrics = self.monitor_metrics(output, ratings.view(-1, 1))
        return output, loss, calc_metrics

        
        
def train():
    
    df = pd.read_csv('/kaggle/input/predict-movie-ratings/train.csv')
    # ID, USER, MOVIE, RATING
    
    lbl_user = preprocessing.LabelEncoder()
    lbl_movie = preprocessing.LabelEncoder()
    
    df.user = lbl_user.fit_transform(df.user.values)
    df.movie = lbl_movie.fit_transform(df.movie.values)
    
    df_train, df_valid = model_selection.train_test_split(
        df, test_size = 0.1, random_state = 42, stratify = df.rating.values
    )
    
    
    train_dataset = MovieDataset(
        users = df_train.user.values,
        movies = df_train.movie.values,
        ratings = df_train.rating.values
    )
       
    valid_dataset = MovieDataset(
        users = df_valid.user.values,
        movies = df_valid.movie.values,
        ratings = df_valid.rating.values
    )
    
    

    model = RecSysModel(num_users = len(lbl_user.classes_), num_movies = len(lbl_movie.classes_))
    
    model.fit(
        train_dataset, valid_dataset, train_bs = 1024,valid_bs = 1024
    )

    

In [None]:
train()