# Distil-Roberta based , 6 epoch Personality training

In [1]:
!pip install torch transformers accelerate -q

In [2]:
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import json
import ast

from transformers import AutoTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments, RobertaTokenizerFast

import torch
from torch.utils.data import DataLoader, Dataset

from sklearn.metrics.pairwise import linear_kernel
from tqdm.auto import tqdm

import copy

2024-03-04 04:59:03.230116: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-04 04:59:03.230230: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-04 04:59:03.345255: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [4]:
device

device(type='cuda')

In [5]:
ratings = pd.read_csv('/kaggle/input/the-movie-dataset/new_ratings_small.csv')
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
...,...,...,...,...
99805,671,6268,2.5,1065579370
99806,671,6269,4.0,1065149201
99807,671,6365,4.0,1070940363
99808,671,6385,2.5,1070979663


In [6]:
movie_ids = np.unique(ratings.movieId.to_numpy().astype(int))

In [7]:
movie_ids.shape

(9025,)

In [8]:
def reindex(movie_id): 
    normal_id = np.where(movie_ids == movie_id)[0][0]
    return normal_id

In [9]:
reindex(1)

0

In [10]:
def load_data(path, delimiter=','):
    train = np.loadtxt(path+'train_data.csv', skiprows=1, delimiter=delimiter)
    test = np.loadtxt(path+'test_data.csv', skiprows=1, delimiter=delimiter)
    eval = np.loadtxt(path+'val_data.csv', skiprows=1, delimiter=delimiter)
    total = np.concatenate((train, test, eval), axis=0)

    n_u = np.unique(total[:, 0]).size #num of users
    n_i = np.unique(total[:, 1]).size #num of items

    train_data = np.zeros((n_u, n_i), dtype='float32')
    test_data = np.zeros((n_u, n_i), dtype='float32')
    eval_data = np.zeros((n_u, n_i), dtype='float32')

    for i in range(train.shape[0]):
        train_data[int(train[i][0])-1][reindex(train[i][1])] = train[i][2]
    
    for i in range(test.shape[0]):
        test_data[int(test[i][0])-1][reindex(test[i][1])] = test[i][2]
        
    for i in range(eval.shape[0]):
        eval_data[int(eval[i][0])-1][reindex(eval[i][1])] = eval[i][2]

    print(f'num_user: {n_u}')
    print(f'num_items: {n_i}')
    return train_data, test_data, eval_data, n_u, n_i

In [11]:
train_data, test_data, eval_data, n_u, n_i = load_data(path='/kaggle/input/the-movie-dataset/')

num_user: 671
num_items: 9025


In [12]:
train_data

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [5., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [13]:
train_data[0][reindex(1371)]

2.5

In [14]:
def average_item_rating(train_data):
    list_average_item_rating = [sum(train_data[:, i])/(np.count_nonzero(train_data[:, i])+0.1) for i in range(train_data.shape[1])]
    return list_average_item_rating

In [15]:
list_average_item_rating = average_item_rating(train_data=train_data)

In [16]:
np.count_nonzero(list_average_item_rating)

8936

In [17]:
index_zero_rating = np.where(np.array(list_average_item_rating) ==0.0)[0]

In [18]:
index_zero_rating

array([ 104,  223,  504,  631,  691, 1409, 1446, 1738, 1943, 2733, 2984,
       3056, 3113, 3336, 3398, 3485, 3603, 3717, 3759, 3985, 4101, 4288,
       4352, 4366, 4442, 4467, 4571, 4843, 5037, 5076, 5138, 5249, 5318,
       5429, 5465, 5496, 5508, 5537, 5573, 5815, 5844, 5866, 5872, 5873,
       5876, 5883, 6073, 6176, 6239, 6316, 6348, 6503, 6510, 6521, 6625,
       6716, 6719, 6764, 6799, 6851, 6927, 6986, 6993, 7093, 7175, 7178,
       7185, 7307, 7317, 7364, 7508, 7584, 8247, 8305, 8349, 8354, 8392,
       8393, 8498, 8507, 8558, 8706, 8730, 8805, 8863, 8869, 8897, 8942,
       9022])

# Tokenizer

In [19]:
model_checking_point = 'distilroberta-base'

In [20]:
tokenizer = RobertaTokenizerFast.from_pretrained(model_checking_point, use_fast=True)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

In [21]:
tokenizer("Hello, this one sentence!", "And this sentence goes with it.")

{'input_ids': [0, 31414, 6, 42, 65, 3645, 328, 2, 2, 2409, 42, 3645, 1411, 19, 24, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [22]:
link_tmdb = pd.read_csv('/kaggle/input/the-movies-dataset/links.csv')
movie_metadatas = pd.read_csv('/kaggle/input/the-movies-dataset/movies_metadata.csv')
credits = pd.read_csv('/kaggle/input/the-movies-dataset/credits.csv')

  movie_metadatas = pd.read_csv('/kaggle/input/the-movies-dataset/movies_metadata.csv')


In [23]:
def combine_features(movie_id: int, link_tmdb, movie_metadatas, credits):

    # Maping movie id with tmdbId
    tmdb_id = int(link_tmdb[link_tmdb.movieId == movie_id].tmdbId.values[0])
    # Getting movie metadata by imdb id , turn tmdb to str because move_metadata.id is str col
    movie_metadata = movie_metadatas[movie_metadatas.id == str(tmdb_id)]  
    credit = credits[credits.id == tmdb_id]

    # Geting budget(str)
    budget = movie_metadata.budget.values[0]

    #  Getting genres
    genres_str = movie_metadata.genres.values[0]
    genres_list = ast.literal_eval(genres_str)
    if len(genres_list) == 0: 
        genres = " "
    else:
        genres = '. '.join([genre['name'] for genre in genres_list])

    # Getting production companys
    pdt_company_str = movie_metadata.production_companies.values[0]
    pdt_company_list = ast.literal_eval(pdt_company_str)
    if len(pdt_company_list) == 0:
        production_company = " "
    else:
        production_company = '. '.join([prt_company['name'] for prt_company in pdt_company_list])

    # Geting prodction countries
    pdt_countries_str = movie_metadata.production_countries.values[0]
    pdt_countries_list = ast.literal_eval(pdt_countries_str)
    if len(pdt_countries_list) == 0:
        production_countries = " "
    else:
        production_countries = '. '.join([prt_countries['name'] for prt_countries in pdt_countries_list])

    # Geting runtime
    runtime = str(movie_metadata.runtime.values[0])

    # Geting casts
    cast_str = credit.cast.values[0]
    cast_list = ast.literal_eval(cast_str)
    if len(cast_list) == 0: 
        casts = " "
    else:
        casts = '. '.join([cast['name'] for cast in cast_list])

    # Getting Overview
    overview = movie_metadata.overview.values[0]

    combined_features = f"{budget}. {genres}. {production_company}. {production_countries}. {runtime}. {casts}. {overview}"

    return combined_features



In [24]:
# Vectorized processing for improved performance
def get_item_features(ids, link_tmdb=link_tmdb, movie_metadatas=movie_metadatas, credits=credits):
    combined_features = []
    if isinstance(ids, (list, tuple, range, np.ndarray)): # Get multiple movie features
        combined_features = [combine_features(movie_ids[i], link_tmdb, movie_metadatas, credits) for i in ids]
    elif isinstance(ids, int): # Get one movie features
        combined_feature = combine_features(movie_ids[ids], link_tmdb, movie_metadatas, credits)
        combined_features.append(combined_feature)
    encodings = tokenizer(combined_features, padding=True, truncation=True, max_length=256, return_tensors="pt")
    return encodings

In [25]:
encodings = get_item_features(ids=range(n_i), link_tmdb=link_tmdb, movie_metadatas=movie_metadatas, credits=credits)

In [26]:
encodings

{'input_ids': tensor([[    0,   246, 45121,  ...,     1,     1,     1],
        [    0,  3506, 33413,  ...,     1,     1,     1],
        [    0,   288,     4,  ...,     1,     1,     1],
        ...,
        [    0,   134, 33413,  ...,     1,     1,     1],
        [    0,  6115,   245,  ...,     1,     1,     1],
        [    0,   288,     4,  ...,     1,     1,     1]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}

## Calcualte item's similarity

In [27]:
item_embeddings = encodings['input_ids'].tolist()

In [28]:
cosine_sim = linear_kernel(item_embeddings, item_embeddings)

In [29]:
cosine_sim.shape

(9025, 9025)

In [30]:
def get_top_similar_items(itemid, topk, list_average_item_rating):
    sim_scores = list(enumerate(cosine_sim[itemid]))
    sim_item = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    list_sim_itemid = [item[0] for item in sim_item[1:topk]]
    
    real_scores = [(i, list_average_item_rating[i]) for i in list_sim_itemid]
    sorted_scores = sorted(real_scores, key=lambda x: x[1], reverse=True)
    list_itemid = [item[0] for item in sorted_scores[:int(topk/2)]]
    
    return list_itemid 

In [31]:
index_zero_rating

array([ 104,  223,  504,  631,  691, 1409, 1446, 1738, 1943, 2733, 2984,
       3056, 3113, 3336, 3398, 3485, 3603, 3717, 3759, 3985, 4101, 4288,
       4352, 4366, 4442, 4467, 4571, 4843, 5037, 5076, 5138, 5249, 5318,
       5429, 5465, 5496, 5508, 5537, 5573, 5815, 5844, 5866, 5872, 5873,
       5876, 5883, 6073, 6176, 6239, 6316, 6348, 6503, 6510, 6521, 6625,
       6716, 6719, 6764, 6799, 6851, 6927, 6986, 6993, 7093, 7175, 7178,
       7185, 7307, 7317, 7364, 7508, 7584, 8247, 8305, 8349, 8354, 8392,
       8393, 8498, 8507, 8558, 8706, 8730, 8805, 8863, 8869, 8897, 8942,
       9022])

In [32]:
def update_rating(list_average_item_rating, index_zero_rating, topk):
    for i in index_zero_rating:
        top_sim_items = get_top_similar_items(itemid=i, topk=topk, list_average_item_rating=list_average_item_rating)
        all_score_cluster = [list_average_item_rating[j] for j in top_sim_items]
        avg_rating = sum(all_score_cluster)/(len(all_score_cluster)+1e-5)
        #cập nhật rating cho vị trí bằng 0
        list_average_item_rating[i] = avg_rating             
                
    return list_average_item_rating

In [33]:
labels = update_rating(list_average_item_rating=list_average_item_rating, index_zero_rating=index_zero_rating, topk=10)

In [34]:
np.count_nonzero(labels)

9025

# Prepare eval dataset

In [35]:
eval_average_ratings = average_item_rating(eval_data)

In [36]:
len(eval_average_ratings)

9025

In [37]:
eval_index_zero_rating = np.where(np.array(eval_average_ratings) ==0.0)[0]

In [38]:
eval_index_zero_rating

array([   7,   12,   25, ..., 9021, 9023, 9024])

In [39]:
eval_labels = np.delete(eval_average_ratings, eval_index_zero_rating)

In [40]:
len(eval_labels)

1517

In [41]:
eval_index_rating = np.where(np.array(eval_average_ratings) != 0.0)[0]

In [42]:
len(eval_index_rating)

1517

In [43]:
eval_encodings = get_item_features(ids=eval_index_rating, link_tmdb=link_tmdb, movie_metadatas=movie_metadatas, credits=credits)

In [44]:
eval_encodings['input_ids'].shape

torch.Size([1517, 256])

# Creating Dataset

In [45]:
# Dataset
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [46]:
train_dataset = CustomDataset(encodings, labels)

In [47]:
eval_dataset = CustomDataset(eval_encodings, eval_labels)

## Fine-Tune using Trainer

In [48]:
model_checking_point

'distilroberta-base'

In [49]:
model = RobertaForSequenceClassification.from_pretrained(model_checking_point, num_labels=1) # 1 label for regression

model.safetensors:   0%|          | 0.00/331M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [50]:
model.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-5): 6 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (

In [52]:
training_args = TrainingArguments(  
    output_dir='./results',
    num_train_epochs=6,             
    per_device_train_batch_size=32,              
    weight_decay=0.01,              
    learning_rate=2e-5,
    load_best_model_at_end=True,
    evaluation_strategy="epoch",
    save_strategy="epoch",
)

In [53]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
)

# Fine-tuning
trainer.train()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss
1,No log,0.787994
2,0.811700,0.783908
3,0.811700,0.815798
4,0.593900,0.829068
5,0.593900,0.815966
6,0.541300,0.818423


TrainOutput(global_step=1698, training_loss=0.6319968871710859, metrics={'train_runtime': 770.9663, 'train_samples_per_second': 70.237, 'train_steps_per_second': 2.202, 'total_flos': 3586490857497600.0, 'train_loss': 0.6319968871710859, 'epoch': 6.0})

# Fine-tune with each Personality 🤗

In [54]:
def get_personality_dataset(user_id, train_data, encodings, link_tmdb=link_tmdb, movie_metadatas=movie_metadatas, credits=credits):
    personality_ratings = train_data[user_id]
    personality_non_ratings = np.where(personality_ratings == 0.0)[0]
    personality_labels = np.delete(personality_ratings, personality_non_ratings)
    
    indexes_to_keep = np.where(personality_ratings != 0.0)[0]
    # Áp dụng mask để giữ lại các elements
    personality_encodings = get_item_features(ids=indexes_to_keep, link_tmdb=link_tmdb, movie_metadatas=movie_metadatas, credits=credits)
    
    personality_dataset = CustomDataset(personality_encodings, personality_labels)
    
    return personality_dataset
    

In [55]:
def predict_testdata(user_id, model, test_data, link_tmdb=link_tmdb, movie_metadatas=movie_metadatas, credits=credits):
    personality_truth = test_data[user_id]
    indexes_to_keep = np.where(personality_truth != 0.0)[0]
    item_encodings = get_item_features(ids=indexes_to_keep, link_tmdb=link_tmdb, movie_metadatas=movie_metadatas, credits=credits).to(device)
    with torch.no_grad():
        outputs = model(**item_encodings)
    output = outputs.logits.cpu().numpy()

    return output.flatten().tolist()
            

In [57]:
# Tải mô hình một lần
community_model_path = '/kaggle/working/results/checkpoint-1698'
original_community_model = RobertaForSequenceClassification.from_pretrained(community_model_path, num_labels=1).to(device)

# Thiết lập TrainingArguments
training_args = TrainingArguments(
    output_dir='./personality_result',
    num_train_epochs=6,
    per_device_train_batch_size=32,
    weight_decay=0.01,
    learning_rate=2e-5,
    # Thêm cấu hình cho mixed precision training nếu cần
    fp16=True,
)

all_list_predict = []

for user_id in tqdm(range(n_u)):
    # Lấy dataset cho user hiện tại
    personality_dataset = get_personality_dataset(user_id=user_id, train_data=train_data, encodings=encodings)
    personality_model = copy.deepcopy(original_community_model)
    
    # Khởi tạo trainer với model đã tải và dataset cụ thể của người dùng
    trainer = Trainer(
        model=personality_model,
        args=training_args,
        train_dataset=personality_dataset,
        tokenizer=tokenizer,
    )

    # Fine-tuning
    trainer.train()

    # Dự đoán
    person_predict_list = predict_testdata(user_id=user_id, model=personality_model, test_data=test_data)
    all_list_predict.extend(person_predict_list)

  0%|          | 0/671 [00:00<?, ?it/s]

Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


Step,Training Loss


In [58]:
all_list_predict[:10]

[2.310546875,
 2.48046875,
 1.8837890625,
 2.017578125,
 2.109375,
 3.044921875,
 2.724609375,
 2.884765625,
 3.125,
 3.71484375]

# Evaluating

In [59]:
def get_test_ratings(test_data):
    test_ratings = []
    list_test_ratings = [np.take(test_data[i], np.where(test_data[i] != 0)[0]) for i in range(test_data.shape[0])]
    for i in list_test_ratings:
        test_ratings.extend(i.flatten().tolist())
    return test_ratings

In [60]:
test_ratings = get_test_ratings(test_data=test_data)

> MSE

In [61]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(test_ratings , all_list_predict)
print('MSE =',mse)

MSE = 0.8924769746919681


> RMSE

In [62]:
import math
rmse = math.sqrt(mse)
print('RMSE =',rmse)     

RMSE = 0.9447099950206773
