In [24]:
import sqlite3
from ossapi import Ossapi
from time import strftime, localtime
import pandas as pd
from gensim.test.utils import datapath
from gensim import utils
import gensim
from gensim.models.callbacks import CallbackAny2Vec
from sklearn.model_selection import train_test_split
import os
from sklearn.neighbors import NearestNeighbors

import numpy as np
import sys

sys.path.insert(0, '../') # Add parent directory because data is in parent directory

In [25]:
df = pd.read_parquet('../data/top_sentences_std.parquet', engine='pyarrow')
sentences = df['sentences'].tolist()

In [19]:
class MonitorCallback(CallbackAny2Vec):
    def __init__(self):
        self.epoch = 0

    def on_epoch_end(
        self, model
    ):  # word2vec accumulates loss, so we need to subtract the previous loss
        loss = model.get_latest_training_loss()
        if self.epoch == 0:
            print("Loss after epoch {}: {}".format(self.epoch, loss))
        else:
            print(
                "Loss after epoch {}: {}".format(
                    self.epoch, loss - self.loss_previous_step
                )
            )
        self.epoch += 1
        self.loss_previous_step = loss

model = gensim.models.Word2Vec(
    sentences=sentences,
    vector_size=15,
    epochs=50,
    window=20,
    min_count=10,  
    workers=16,
    sg=0,
    hs=1, 
    negative=10,
    ns_exponent=0.8, 
    compute_loss=True,
    callbacks=[MonitorCallback()],
)

# loss = model.get_latest_training_loss()

KeyboardInterrupt: 

In [43]:
model.save('word2vec_1.model')

In [33]:
# # word2vec_model = model
# # else:
#     # word2vec_model = gensim.models.Word2Vec.load("word2vec-pp/word2vec-pp")
# model = gensim.models.Word2Vec.load("w2v_model/w2v_model")
# model.wv.index_to_key[10000:20000]
word2vec_model = model
NN = NearestNeighbors(n_neighbors=50, algorithm='ball_tree').fit(word2vec_model.wv.vectors)

In [40]:
import sys
sys.path.insert(0, "../")
from data.classes import Score

OSU_CLIENT_ID = os.environ.get("OSU_CLIENT_ID")
OSU_CLIENT_SECRET = os.environ.get("OSU_CLIENT_SECRET")
user_id = '28956125'

api = Ossapi(OSU_CLIENT_ID, OSU_CLIENT_SECRET)
top_scores = api.user_scores(user_id, type="best", mode="osu", limit=100)

top_scores = [Score(score) for score in top_scores]
top_scores.sort(key=lambda x: x.pp, reverse=True)

# top_scores = [
#     str(score.beatmap_id) + "-" + str(score.mods) for score in top_scores # Limit to top 50
# ]

top_scores = ['1872812-0']
top_scores_vec = [
    score for score in top_scores if score in word2vec_model.wv.index_to_key
]
top_scores_vec = [word2vec_model.wv[score] for score in top_scores_vec]

neighbor = NN.kneighbors([np.mean(top_scores_vec, axis=0)])

In [41]:
# Get the top 5 beatmaps
beatmaps = [model.wv.index_to_key[i] for i in neighbor[1][0]]
beatmaps = [beatmap for beatmap in beatmaps if beatmap not in top_scores]

beatmaps

['2477065-0',
 '1476903-0',
 '3211514-0',
 '1086057-0',
 '1466471-0',
 '2681313-0',
 '1166899-0',
 '1744286-0',
 '996054-0',
 '2323381-0',
 '246280-0',
 '2643868-0',
 '1465268-0',
 '1601387-0',
 '2017881-0',
 '2173072-0',
 '1658615-0',
 '3278791-0',
 '746315-0',
 '2850593-0',
 '1082403-0',
 '3978313-0',
 '2323881-0',
 '1564926-0',
 '3873445-0',
 '2088202-0',
 '1413077-0',
 '845391-0',
 '2784944-0',
 '1842043-0',
 '1391282-0',
 '3534472-0',
 '2366247-0',
 '3601629-0',
 '635679-0',
 '3018109-0',
 '2011977-0',
 '1506912-0',
 '2110791-0',
 '1557471-0',
 '3737820-0',
 '2349954-0',
 '3385965-0',
 '1243846-0',
 '457061-0',
 '2378549-0',
 '1553356-0',
 '2335757-0',
 '1476604-0']