### COLLABORATIVE FILTERING (Surprise KNN)

Test du model KNN sur les faux utilisateurs

### Imports 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

from surprise import Dataset
from surprise import Reader

from surprise import KNNBasic

from sklearn.metrics import roc_auc_score

import itertools

## Data imports 

In [2]:
df_train = pd.read_csv('filtered data/filtered_rating_fm_dataset_train.csv', index_col=0)

  mask |= (ar1 == a)


In [3]:
files = ['last_fm_fake_user(1001)_jazz.csv',
         'last_fm_fake_user(1002)_classic.csv',
         'last_fm_fake_user(1003)_pop.csv',
         'last_fm_fake_user(1004)_rock.csv',
         'last_fm_fake_user(1005)_rap.csv']

for file in files:
    df_temp = pd.read_csv(f'filtered data/fake_user/{file}', index_col=0)
    df_temp['rating']=100 # a revoir
    df_train = pd.concat([df_train, df_temp])

In [4]:
df_train = df_train.groupby(['user_id','track_id']).first().reset_index()

In [5]:
df_test = pd.read_csv('filtered data/filtered_rating_fm_dataset_test.csv', index_col=0)

In [6]:
df_track = df_train.groupby(['track_id']).first().reset_index()

## Data Preparation

In [10]:
reader = Reader(rating_scale=(1, 101))
data = Dataset.load_from_df(df_train[['user_id', 'track_id', 'rating']], reader)

In [11]:
trainset = data.build_full_trainset()

In [22]:
def get_reco(model, df_train, user_id, n=50, filter=False):
    
    scores=[]
    track_ids=[]
    user_ids=[]
    
    user_track = set(df_train[df_train['user_id']==user]['track_id'].unique())

    for track_id in df_train['track_id'].unique():
        score = model.predict(user, track_id, r_ui=1)[3]
        
        if filter and track_id not in user_track:
            user_ids.append(user)
            track_ids.append(track_id)
            scores.append(score)
        elif not filter:
            user_ids.append(user)
            track_ids.append(track_id)
            scores.append(score)
    
    recommendation = pd.DataFrame({'user_id':user_ids,'track_id':track_ids,'score':scores})
    
    if n>=0:
        recommendation = recommendation.sort_values(by=['score'], ascending=False).iloc[:n]
    
    return recommendation
    

## Modèle KNN

In [15]:
knn = KNNBasic(k=60, min_k=15)
knn.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x225638e8490>

### Utilisateur Jazz 1001

In [23]:
user = 1001

recommendation = get_reco(knn, df_train, user,10)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Melody Gardot,Worrisome Heart
1,Billie Holiday,A Fine Romance
2,Ella Fitzgerald,Summertime
3,Louis Armstrong,Blueberry Hill
4,Billie Holiday,Blue Moon
5,Ray Charles,Born To Lose
6,Duke Ellington,Sophisticated Lady
7,Miles Davis,My Funny Valentine
8,Ray Charles,I'Ve Got A Woman
9,Nina Simone,Little Girl Blue


In [24]:
user = 1001

recommendation = get_reco(knn, df_train, user,10, filter=True)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Koop,Koop Island Blues
1,Sigur Rós,Untitled 4
2,The Kills,U.R.A. Fever
3,Koop,I See A Different You
4,Tool,The Pot
5,Porcupine Tree,Blackest Eyes
6,Feist,One Two Three Four
7,The Olivia Tremor Control,Green Typewriters
8,A Perfect Circle,3 Libras
9,Neko Case,"Hold On, Hold On"


### Utilisateur classique 1002

In [35]:
user = 1002

recommendation = get_reco(knn, df_train, user,10)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Antonio Vivaldi,Concerto In G
1,Ludwig Van Beethoven,Ode To Joy
2,Ludwig Van Beethoven,5Th Symphony
3,Wolfgang Amadeus Mozart,Lacrimosa
4,Wolfgang Amadeus Mozart,Requiem
5,Johann Sebastian Bach,Air
6,Ludwig Van Beethoven,Für Elise
7,Neutral Milk Hotel,In The Aeroplane Over The Sea
8,Arcade Fire,Rebellion (Lies)
9,Pink Floyd,Shine On You Crazy Diamond


In [25]:
user = 1002

recommendation = get_reco(knn, df_train, user,10, filter=True)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Neutral Milk Hotel,In The Aeroplane Over The Sea
1,Arcade Fire,Rebellion (Lies)
2,Pink Floyd,Shine On You Crazy Diamond
3,My Bloody Valentine,Soft As Snow (But Warm Inside)
4,M.I.A.,Paper Planes
5,Snow Patrol,Chasing Cars
6,Arcade Fire,Neighborhood #1 (Tunnels)
7,The Decemberists,Here I Dreamt I Was An Architect
8,Neutral Milk Hotel,Two-Headed Boy
9,Wilco,"Jesus, Etc."


### Utilisateur Pop 1003

In [36]:
user = 1003

recommendation = get_reco(knn, df_train, user,10)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Shakira,En Tus Pupilas
1,Alicia Keys,Wake Up
2,Shakira,Hips Don'T Lie
3,Shakira,Las De La Intuición
4,Britney Spears,Mmm Papi
5,Miley Cyrus,7 Things
6,Katy Perry,Fingerprints
7,Shakira,Fool
8,Katy Perry,I'M Still Breathing
9,Rihanna,Say It


In [26]:
user = 1003

recommendation = get_reco(knn, df_train, user,10, filter=True)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Afi,Prelude 12/21
1,Aesop Rock,Daylight
2,Beastie Boys,Paul Revere
3,Guns N' Roses,Better
4,Cascada,Everytime We Touch
5,Paramore,Decode
6,In Flames,Take This Life
7,Britney Spears,Gimme More
8,Oasis,Falling Down
9,Afi,The Interview


### Utilisateur Rock 1004

In [37]:
user = 1004

recommendation = get_reco(knn, df_train, user,10)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Korn,Swallow
1,Limp Bizkit,Don'T Go Off Wandering
2,Noir Désir,L'Appartement
3,Korn,Blame
4,Deftones,My Own Summer
5,Korn,Reclaim My Place
6,Korn,Politics
7,Linkin Park,Opening
8,Deftones,Battle-Axe
9,Slipknot,Psychosocial


In [27]:
user = 1004

recommendation = get_reco(knn, df_train, user,10, filter=True)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Journey,Open Arms
1,Papa Roach,Not Listening
2,Neko Case,"Hold On, Hold On"
3,Boy Division,Love Will Tear Us Apart
4,Children Of Bodom,Are You Dead Yet?
5,Joe Walsh,Rocky Mountain Way
6,Afi,Kill Caustic
7,Death Cab For Cutie,Soul Meets Body
8,Rush,The Spirit Of Radio
9,Afi,Prelude 12/21


### Utilisateur Rap 1005

In [38]:
user = 1005

recommendation = get_reco(knn, df_train, user,10)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Jay-Z,Justify My Thug
1,Kanye West,Paranoid
2,Snoop Dogg,Signs
3,Snoop Dogg,Tha Shiznit
4,Jay-Z,"Girls, Girls, Girls"
5,Jay-Z,Hard Knock Life
6,2Pac,Temptations
7,Kanye West,Wake Up Mr. West
8,Eminem,Kim
9,The Notorious B.I.G.,Ready To Die


In [28]:
user = 1005

recommendation = get_reco(knn, df_train, user,10, filter=True)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Kanye West,Good Life
1,Kanye West,Champion
2,Kanye West,I Wonder
3,Kanye West,The Glory
4,Black Kids,I'Ve Underestimated My Charm (Again)
5,Kanye West,Stronger
6,Crystal Castles,Magic Spells
7,Crystal Castles,Crimewave (Crystal Castles Vs. Health)
8,Crystal Castles,Untrust Us
9,Crystal Castles,Alice Practice
