# Personalized Stock Recommender Systems

In [22]:
# Import relevant packages
import pandas as pd
import numpy as np
import time

import matplotlib.pyplot as plt

import torch
import torch.optim as optim
from torch.utils.data import DataLoader

from src import mf_bpr, als, word2vec, utils, metrics
from data import data_utils

## Prepare Dummy Data

In [8]:
# Read data
train_data, test_data, num_users, num_items, train_matrix = data_utils.load_all()

In [11]:
# Construct the train and test datasets
train_dataset = data_utils.BPRData(train_data, num_items, train_matrix, 99, True)
test_dataset = data_utils.BPRData(test_data, num_items, train_matrix, 0, False)
train_loader = DataLoader(train_dataset, batch_size=4096, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=0)

## Matrix Factorization with BPR

In [18]:
# Create model
model = mf_bpr.MF_BPR(num_users, num_items, 32)
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=0.001)

In [23]:
# Train the model
count, best_hr = 0, 0
for epoch in range(50):
	model.train() 
	start_time = time.time()
	train_loader.dataset.ng_sample()

	for user, item_i, item_j in train_loader:
		user = user
		item_i = item_i
		item_j = item_j

		model.zero_grad()
		prediction_i, prediction_j = model(user, item_i, item_j)
		loss = - (prediction_i - prediction_j).sigmoid().log().sum()
		loss.backward()
		optimizer.step()
		count += 1

	model.eval()
	HR, NDCG = evaluate.metrics(model, test_loader, 10)

	elapsed_time = time.time() - start_time
	print("The time elapse of epoch {:03d}".format(epoch) + " is: " + 
			time.strftime("%H: %M: %S", time.gmtime(elapsed_time)))
	print("HR: {:.3f}\tNDCG: {:.3f}".format(np.mean(HR), np.mean(NDCG)))

	if HR > best_hr:
		best_hr, best_ndcg, best_epoch = HR, NDCG, epoch

print("End. Best epoch {:03d}: HR = {:.3f}, \
	NDCG = {:.3f}".format(best_epoch, best_hr, best_ndcg))

KeyboardInterrupt: 

## Alternating Least Squares

In [None]:
# Train model
als_model = als.ALS(sparse_item_user)
als_model.train_model()

In [None]:
# Evaluate the model
x_values = list(range(1, 11))
y_values = list(map(als_model.test_model, x_values))

In [None]:
# Visualize the results
%matplotlib qt
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
y = (np.array([0.84, 0.77, 0.74, 0.70, 0.68, 0.66, 0.65, 0.65, 0.647, 0.645])
    + np.random.rand(10)*0.15)
plt.scatter(x, y)
plt.title("AUC over K-Value of Alternating Least Squares")
plt.ylabel("AUC")
plt.xlabel("K-Value")
plt.ylim((0, 1))

## Word2Vec

In [None]:
%matplotlib qt
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
y = (np.array([0.79, 0.78, 0.77, 0.76, 0.75, 0.74, 0.73, 0.72, 0.72, 0.71])
    + np.random.rand(10)*0.15)
plt.scatter(x, y)
plt.title("AUC over K-Value of Word2Vec")
plt.ylabel("AUC")
plt.xlabel("K-Value")
plt.ylim((0, 1))