In [1]:
from Scripts.data_processing import DataManager
from Scripts.model_analysis import ModelEvaluator
from Scripts.models import Valuer, VariationalAutoencoder, VAELoss, Recommender

from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics

from skorch import NeuralNet
from skorch.dataset import CVSplit

from pathlib import Path
import pickle

import torch

In [2]:
torch.cuda.is_available()
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

In [3]:
DATA_PATH = 'dados/dados_treino.csv'
manager = DataManager(DATA_PATH)
print(manager.users)

OUTPUT_PATH = 'dados/modelos'

['#ID4940', '#ID9181']


O sistema deve treinar e ajustar um modelo para cada usuário. O modelo treinado será salvo em uma pasta.

In [4]:
for USER in manager.users:
    print(f"Training models for user {USER}")
    
    # 1. Treina os modelos de classificação e regressão.
    
    # Aqui os modelos serão especificados manualmente, mas eles poderiam ser escolhidos utilizado os scripts
    # do sistema de avaliação de modelos.
    X_train, X_test, y_train, y_test = manager.get_training_data(USER, test_size = 0.2, oversampling = 'SMOTENC')
    classifier = SVC(kernel = 'linear')
    classifier.fit(X_train, y_train)
    print(f"Classifier Model: {classifier}")
    print(f"Accuracy Score: {metrics.accuracy_score(classifier.predict(X_test), y_test)}")
    print()
    
    X_train, X_test, y_train, y_test = manager.get_training_data(USER, test_size = 0.2, classification = False)
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    print(f"Regression Model: {regressor}")
    print(f"Mean Squared Error: {metrics.mean_squared_error(regressor.predict(X_test), y_test)}")
    print()
    
    evaluator_model = Valuer(classification_model = classifier, regression_model = regressor)
    
    # 2. Treina o modelo generativo.
    
    skorch_model = NeuralNet(
        module = VariationalAutoencoder,
        module__latent_dim = 35,
        module__hidden_dims = [40],
        criterion = VAELoss,
        optimizer = torch.optim.Adam,
        lr = 0.0001,
        max_epochs = 200,
        batch_size = 100,
        iterator_train__shuffle = True,
        train_split = CVSplit(0.2),
        device = device
    )
    
    print("Generative Model:")
    print(skorch_model)
    
    # Ajuste de escala "treinado" para os dados do usuário selecionado
    scaler = MinMaxScaler()
    
    # Seleciona o conjunto de dados com curtida positiva, normaliza e transforma em torch.Tensor
    X, Y = manager.data_arrays(USER)
    X = X[Y[:, 0] == 1]
    scaler.fit(X)
    X = scaler.transform(X)
    X = torch.tensor(X).float().to(device)

    # Ajusta o modelo
    skorch_model.fit(X, X)
    print()
    
    # 3. Criação do sistema de recomendação adaptado ao usuário
    print("Creating recommender model.")
    recommender = Recommender(generativeModel = skorch_model.module_,
                              evaluationModel = evaluator_model,
                              scaler = scaler,
                              user = USER)
    print()
    
    # 4. Salva o modelo
    path = Path(OUTPUT_PATH) / USER
    path.mkdir(parents = True, exist_ok = True)
    
    print(f"Saving recommender model to: {path}")
    filehandler = open(path / 'recommender.pickle', "wb")
    pickle.dump(recommender, filehandler)
    
    print(f"Finishing model adjusment for user {USER}")
    print()

Training models for user #ID4940
Classifier Model: SVC(kernel='linear')
Accuracy Score: 0.581021897810219

Regression Model: LinearRegression()
Mean Squared Error: 2.416730036971711

Generative Model:
<class 'skorch.net.NeuralNet'>[uninitialized](
  module=<class 'Scripts.models.VariationalAutoencoder'>,
  module__hidden_dims=[40],
  module__latent_dim=35,
)
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5417[0m        [32m0.5372[0m  0.2317
      2        [36m0.5379[0m        [32m0.5347[0m  0.0280
      3        [36m0.5331[0m        [32m0.5307[0m  0.0278
      4        [36m0.5305[0m        [32m0.5291[0m  0.0271
      5        [36m0.5289[0m        [32m0.5250[0m  0.0276
      6        [36m0.5242[0m        [32m0.5226[0m  0.0294
      7        [36m0.5220[0m        [32m0.5178[0m  0.0283
      8        [36m0.5181[0m        [32m0.5131[0m  0.0281
      9        [36m0.5149[0m        [32m0.5112[0m  

In [5]:
model_path = Path(OUTPUT_PATH) / manager.users[0] / 'recommender.pickle'
file = open(model_path, "rb")
recommender = pickle.load(file)

In [11]:
recommendation_list = recommender.getMusicList(23, manager.data.drop(columns = ['id_cliente', 'data_curtida', 'n_reproducao', 'gostou']))

In [12]:
recommendation_list.drop_duplicates()

Unnamed: 0,Tem_Instr_Violao_Viola,Tem_Instr_Guitarra,Tem_Instr_Cavaco,Tem_Instr_Sintetizador_Teclado,Tem_Instr_Piano,Tem_Instr_Metais,Tem_Instr_Madeiras,Tem_Instr_Cordas,escala_maior,bateria_eletronica,...,f#,g,g#,ano_lancamento,BPM,VolMedio,PctCantada,PctRap,duracao,evaluation
783,1,0,0,0,0,0,0,0,1,0,...,0,0,0,2015,47.078522,6.285548,0.730587,0.0,2.83755,"(1.0, 4.001617431640625)"
4009,1,1,0,1,0,0,0,1,0,0,...,0,0,0,1961,139.179768,10.839323,0.694978,0.0,6.967417,"(1.0, 3.97772216796875)"
3427,0,1,0,1,1,1,0,0,1,0,...,0,0,0,1940,131.450984,12.433552,0.41689,0.0,4.176583,"(1.0, 3.912567138671875)"
3221,1,1,0,1,1,0,0,0,1,0,...,0,0,0,1949,153.209063,11.643118,0.783589,0.0,5.93725,"(1.0, 3.727325439453125)"
403,1,1,0,1,0,0,0,0,1,0,...,0,0,0,2016,67.686724,8.435219,0.668896,0.0,2.900233,"(1.0, 3.676361083984375)"
4964,1,0,0,0,0,0,0,0,1,0,...,0,0,0,2016,55.682308,6.790896,0.867349,0.0,2.432717,"(1.0, 3.663330078125)"
2629,0,1,0,1,1,1,0,1,0,0,...,0,0,0,1992,115.535697,10.011328,0.50602,0.0,7.428917,"(1.0, 3.66143798828125)"
4686,1,1,0,1,0,0,0,0,1,0,...,0,0,0,1939,134.416206,9.965111,0.690751,0.0,4.20965,"(1.0, 3.651702880859375)"
2689,1,0,0,0,0,0,0,0,1,0,...,0,0,0,2014,59.436644,7.764802,0.700477,0.0,2.7105,"(1.0, 3.640350341796875)"
559,1,1,0,0,0,0,0,0,1,0,...,0,0,0,2016,65.487303,5.10666,0.741388,0.0,2.924883,"(1.0, 3.604461669921875)"
