In [23]:
from pathlib import Path

import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
from sklearn.model_selection import train_test_split

from models.sae import SAE
from read_data import read_data, create_tabular_data



In [24]:
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings, movies = read_data(Path('ml-1m'))

In [25]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [26]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [27]:
nb_users = len(users)
nb_movies = len(movies)

In [28]:
nb_users, nb_movies

(6040, 3883)

In [29]:
df = create_tabular_data(ratings,users,movies) 

In [30]:
train_set, test_set = train_test_split(df, test_size=0.2)

In [31]:
train_set = torch.FloatTensor(train_set.to_numpy())
test_set = torch.FloatTensor(test_set.to_numpy())

In [32]:
sae = SAE(nb_users,nb_movies)
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr = 0.05, weight_decay = 0.5)

In [33]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [34]:
sae.to(device)

SAE(
  (fc1): Linear(in_features=3883, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=20, bias=True)
  (fc4): Linear(in_features=20, out_features=3883, bias=True)
  (activation): Sigmoid()
)

In [35]:
train_set = train_set.to(device)
test_set = test_set.to(device)

In [36]:
# Обучаем модель SAE
sae.train_model(1000, train_set, criterion, optimizer, nb_users, nb_movies)
# Тестируем SAE
sae.test(train_set, test_set, criterion, nb_users, nb_movies)


epoch: 1 loss: tensor(1.3937)
epoch: 2 loss: tensor(1.0857)
epoch: 3 loss: tensor(1.0535)
epoch: 4 loss: tensor(1.0419)
epoch: 5 loss: tensor(1.0391)
epoch: 6 loss: tensor(1.0568)
epoch: 7 loss: tensor(1.0710)
epoch: 8 loss: tensor(1.0535)
epoch: 9 loss: tensor(1.0511)
epoch: 10 loss: tensor(1.0418)
epoch: 11 loss: tensor(1.0407)


KeyboardInterrupt: 

In [None]:
# Получим предсказания для рандомного фильма из тестового набора
rec = sae.predict_recommendations(test_set)
rec[:50]