In [1]:
from models.userProfileModel import UserProfileModel
from models.popularityModel import PopularityModel
from models.targetModel import TargetModel
from data.loadData import *
from data.dataFunctions import get_played_songs_for_user_id, get_songs_by_traks_ids, get_played_tracks, find_random_n_track_ids
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [2]:
TEST_SET = 0.3
N = 10

users = load_users()
tracks = load_tracks()
artists = load_artists()

sessions = load_sessions()
sessions_train, sessions_test = train_test_split(
    sessions.values, test_size=TEST_SET
)
sessions_train = pd.DataFrame(
    data=sessions_train, columns=["user_id", "track_id", "event"]
)
sessions_test = pd.DataFrame(
    data=sessions_test, columns=["user_id", "track_id", "event"]
)


# User Profile Model Evaluation

In [3]:
userProfilemodel = UserProfileModel()
userProfilemodel.fit(users, tracks, sessions_train)


In [4]:
USERS = [101, 102]

tracks_played_in_future = get_played_tracks(USERS, sessions_test)
avg_position = 0
in_best_n = 0

for examle_track_id in tqdm(tracks_played_in_future):
    evaluation_tracks = [examle_track_id]
    evaluation_tracks.extend(find_random_n_track_ids(100))

    ranked_songs = userProfilemodel.rank_tracks_for_users(
        USERS, evaluation_tracks
    )
    avg_position += ranked_songs.index(examle_track_id)
    if ranked_songs.index(examle_track_id) < N:
        in_best_n += 1

print(
    "average position: ", round(avg_position / len(tracks_played_in_future), 2)
)
print(f"in top {N}: {in_best_n} times")
print(f"recall@{N}: {round(in_best_n/len(tracks_played_in_future) * 100, 2)} %")


100%|██████████| 200/200 [08:21<00:00,  2.51s/it]

average position:  47.98
in top 10: 26 times
recall@10: 13.0 %





# Popularity Model Evaluation

In [5]:
popularityModel = PopularityModel()
popularityModel.fit(users, tracks, artists)


In [6]:
USERS = [241, 364]

tracks_played_in_future = get_played_tracks(USERS, sessions_test)
avg_position = 0
in_best_n = 0

for examle_track_id in tqdm(tracks_played_in_future):
    evaluation_tracks = [examle_track_id]
    evaluation_tracks.extend(find_random_n_track_ids(100))

    ranked_songs = popularityModel.rank_tracks_for_users(
        USERS, evaluation_tracks
    )
    avg_position += ranked_songs.index(examle_track_id)
    if ranked_songs.index(examle_track_id) < N:
        in_best_n += 1

print(
    "average position: ", round(avg_position / len(tracks_played_in_future), 2)
)
print(f"in top {N}: {in_best_n} times")
print(f"recall@{N}: {round(in_best_n/len(tracks_played_in_future) * 100, 2)} %")


100%|██████████| 120/120 [00:42<00:00,  2.84it/s]

average position:  42.1
in top 10: 21 times
recall@10: 17.5 %





# Target Model - user profile + popularity

In [7]:
targetModel = TargetModel()
targetModel.fit(users, tracks, artists, sessions_train)


In [8]:
USERS = [241, 364]

tracks_played_in_future = get_played_tracks(USERS, sessions_test)
avg_position = 0
in_best_n = 0

for examle_track_id in tqdm(tracks_played_in_future):
    evaluation_tracks = [examle_track_id]
    evaluation_tracks.extend(find_random_n_track_ids(100))

    ranked_songs = targetModel.rank_tracks_for_users(USERS, evaluation_tracks)
    avg_position += ranked_songs.index(examle_track_id)
    if ranked_songs.index(examle_track_id) < N:
        in_best_n += 1

print(
    "average position: ", round(avg_position / len(tracks_played_in_future), 2)
)
print(f"in top {N}: {in_best_n} times")
print(f"recall@{N}: {round(in_best_n/len(tracks_played_in_future) * 100, 2)} %")


100%|██████████| 120/120 [04:56<00:00,  2.47s/it]

average position:  45.09
in top 10: 19 times
recall@10: 15.83 %





# Comparison - Average results

In [9]:
userProfilemodel = UserProfileModel()
userProfilemodel.fit(users, tracks, sessions_train)

popularityModel = PopularityModel()
popularityModel.fit(users, tracks, artists)

targetModel = TargetModel()
targetModel.fit(users, tracks, artists, sessions_train)


In [10]:
ALL_USERS = [
    [542, 425, 321],
    [514, 281],
    [475, 386, 476],
    [317, 391],
    [288]
]

all_tracks_played = 0
avg_position_pop, avg_position_user, avg_position_target = 0, 0, 0
in_best_n_pop, in_best_n_user, in_best_n_target = 0, 0, 0

for USERS in ALL_USERS:
    tracks_played_in_future = get_played_tracks(USERS, sessions_test)
    all_tracks_played += len(tracks_played_in_future)

    for examle_track_id in tqdm(tracks_played_in_future):
        evaluation_tracks = [examle_track_id]
        evaluation_tracks.extend(find_random_n_track_ids(100))

        ranked_songs_popularity = popularityModel.rank_tracks_for_users(
            USERS, evaluation_tracks
        )
        ranked_songs_user_profile = userProfilemodel.rank_tracks_for_users(
            USERS, evaluation_tracks
        )
        ranked_songs_target = targetModel.rank_tracks_for_users(
            USERS, evaluation_tracks
        )

        avg_position_pop += ranked_songs_popularity.index(examle_track_id)
        avg_position_user += ranked_songs_user_profile.index(examle_track_id)
        avg_position_target += ranked_songs_target.index(examle_track_id)

        if ranked_songs_popularity.index(examle_track_id) < N:
            in_best_n_pop += 1

        if ranked_songs_user_profile.index(examle_track_id) < N:
            in_best_n_user += 1

        if ranked_songs_target.index(examle_track_id) < N:
            in_best_n_target += 1

print("================= Popularity Model =================")
print("average position: ", round(avg_position_pop / all_tracks_played, 2))
print(f"in top {N}: {in_best_n_pop} times")
print(f"recall@{N}: {round(in_best_n_pop/all_tracks_played * 100, 2)} %")


print("================ User Profile Model ================")
print("average position: ", round(avg_position_user / all_tracks_played, 2))
print(f"in top {N}: {in_best_n_user} times")
print(f"recall@{N}: {round(in_best_n_user/all_tracks_played * 100, 2)} %")


print("=================== Target Model ===================")
print("average position: ", round(avg_position_target / all_tracks_played, 2))
print(f"in top {N}: {in_best_n_target} times")
print(f"recall@{N}: {round(in_best_n_target/all_tracks_played * 100, 2)} %")


100%|██████████| 432/432 [49:33<00:00,  6.88s/it]
100%|██████████| 200/200 [15:43<00:00,  4.72s/it]
100%|██████████| 353/353 [40:44<00:00,  6.93s/it]
100%|██████████| 445/445 [34:16<00:00,  4.62s/it]
100%|██████████| 60/60 [02:27<00:00,  2.46s/it]

average position:  48.45
in top 10: 164 times
recall@10: 11.01 %
average position:  49.39
in top 10: 147 times
recall@10: 9.87 %
average position:  50.26
in top 10: 154 times
recall@10: 10.34 %



