### COLLABORATIVE FILTERING (Surprise SVD)

Test du model SVD sur les faux utilisateurs

### Imports 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

from surprise import Dataset
from surprise import Reader

from surprise import SVD

from sklearn.metrics import roc_auc_score

import itertools

## Data imports 

In [2]:
df_train = pd.read_csv('filtered data/filtered_rating_fm_dataset_train.csv', index_col=0)

  mask |= (ar1 == a)


In [3]:
files = ['last_fm_fake_user(1001)_jazz.csv',
         'last_fm_fake_user(1002)_classic.csv',
         'last_fm_fake_user(1003)_pop.csv',
         'last_fm_fake_user(1004)_rock.csv',
         'last_fm_fake_user(1005)_rap.csv']

for file in files:
    df_temp = pd.read_csv(f'filtered data/fake_user/{file}', index_col=0)
    df_temp['rating']=100 # a revoir
    df_train = pd.concat([df_train, df_temp])

In [4]:
df_train = df_train.groupby(['user_id','track_id']).first().reset_index()

In [5]:
df_test = pd.read_csv('filtered data/filtered_rating_fm_dataset_test.csv', index_col=0)

In [6]:
df_track = df_train.groupby(['track_id']).first().reset_index()

## Data Preparation

In [7]:
reader = Reader(rating_scale=(1, 101))
data = Dataset.load_from_df(df_train[['user_id', 'track_id', 'rating']], reader)

In [8]:
trainset = data.build_full_trainset()

In [9]:
def get_reco(model, df_train, user_id, n=50, filter=False):
    
    scores=[]
    track_ids=[]
    user_ids=[]
    
    user_track = set(df_train[df_train['user_id']==user]['track_id'].unique())

    for track_id in df_train['track_id'].unique():
        score = model.predict(user, track_id, r_ui=1)[3]
        
        if filter and track_id not in user_track:
            user_ids.append(user)
            track_ids.append(track_id)
            scores.append(score)
        elif not filter:
            user_ids.append(user)
            track_ids.append(track_id)
            scores.append(score)
    
    recommendation = pd.DataFrame({'user_id':user_ids,'track_id':track_ids,'score':scores})
    
    if n>=0:
        recommendation = recommendation.sort_values(by=['score'], ascending=False).iloc[:n]
    
    return recommendation
    

## Modèle SVD

In [10]:
svd = SVD(n_factors=150, n_epochs=30)
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1ccd4988850>

### Utilisateur Jazz 1001

In [11]:
user = 1001

recommendation = get_reco(svd, df_train, user,10)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Nina Simone,Don'T Let Me Be Misunderstood
1,Frank Sinatra,I Get A Kick Out Of You
2,Norah Jones,Wish I Could
3,Norah Jones,What Am I To You
4,Nina Simone,Little Girl Blue
5,Norah Jones,Until The End
6,Nina Simone,Feeling Good
7,Billie Holiday,Blue Moon
8,Louis Armstrong,Mack The Knife
9,Ray Charles,I'Ve Got A Woman


In [12]:
user = 1001

recommendation = get_reco(svd, df_train, user,10, filter=True)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Daft Punk,"Harder, Better, Faster, Stronger"
1,The Cure,Boys Don'T Cry
2,Pet Shop Boys,What Have I Done To Deserve This
3,Eels,Elizabeth On The Bathroom Floor
4,Boston,More Than A Feeling
5,The Postal Service,Brand New Colony
6,The Smiths,I Know It'S Over
7,Death Cab For Cutie,Soul Meets Body
8,I Am Kloot,Over My Shoulder
9,Air,Sexy Boy


### Utilisateur classique 1002

In [14]:
user = 1002

recommendation = get_reco(svd, df_train, user,10)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Wolfgang Amadeus Mozart,Lacrimosa
1,Wolfgang Amadeus Mozart,Sanctus
2,Wolfgang Amadeus Mozart,Benedictus
3,Antonio Vivaldi,Allegro
4,Ludwig Van Beethoven,Ode To Joy
5,Ludwig Van Beethoven,5Th Symphony
6,Wolfgang Amadeus Mozart,Tuba Mirum
7,Wolfgang Amadeus Mozart,Dies Irae
8,Johann Sebastian Bach,Air On A G String
9,Wolfgang Amadeus Mozart,Agnus Dei


In [13]:
user = 1002

recommendation = get_reco(svd, df_train, user,10, filter=True)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Bon Iver,Blood Bank
1,The Black Crowes,Goodbye Daughters Of The Revolution
2,Maurice Ravel,Bolero
3,Oasis,Falling Down
4,Paramore,Misery Business
5,Queen,Seven Seas Of Rhye
6,Cream,Sunshine Of Your Love
7,Модест Петрович Мусоргский,Pictures At An Exhibition: Bydlo
8,The Shins,Phantom Limb
9,Blackbud,Heartbeat


### Utilisateur Pop 1003

In [15]:
user = 1003

recommendation = get_reco(svd, df_train, user,10)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Britney Spears,Gimme More
1,Britney Spears,If U Seek Amy
2,Nelly Furtado,Say It Right
3,Kaiser Chiefs,Ruby
4,Madonna,Like A Prayer
5,Gwen Stefani,Rich Girl
6,Madonna,Like A Virgin
7,Katy Perry,Hot N Cold
8,Alicia Keys,Fallin'
9,Rihanna,Unfaithful


In [14]:
user = 1003

recommendation = get_reco(svd, df_train, user,10, filter=True)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Coldplay,Viva La Vida
1,Britney Spears,Circus
2,Gabriella Cilmi,Sweet About Me
3,Bright Eyes,Kaite Come True
4,Mgmt,Electric Feel
5,Guns N' Roses,November Rain
6,The Clash,London Calling
7,Limp Bizkit,Hold On
8,The Cure,A Forest
9,Flipsyde,Someday


### Utilisateur Rock 1004

In [16]:
user = 1004

recommendation = get_reco(svd, df_train, user,10)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Soulfly,Unleash
1,Linkin Park,Shadow Of The Day
2,Rage Against The Machine,Wake Up
3,Black Sabbath,Paranoid
4,Linkin Park,In The End
5,Nirvana,Rape Me
6,Deftones,Hole In The Earth
7,Korn,A.D.I.D.A.S.
8,Disturbed,Down With The Sickness
9,Linkin Park,With You


In [15]:
user = 1004

recommendation = get_reco(svd, df_train, user,10, filter=True)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Radiohead,Fake Plastic Trees
1,Frank Zappa,King Kong
2,Mana,Labios Compartidos
3,Metallica,Nothing Else Matters
4,Whitesnake,Here I Go Again
5,The Jam,In The City
6,R.E.M.,Losing My Religion
7,Soulfly,Unleash
8,Nirvana,Heart-Shaped Box
9,At The Gates,Blinded By Fear


### Utilisateur Rap 1005

In [17]:
user = 1005

recommendation = get_reco(svd, df_train, user,10)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Tim Buckley,Phantasmagoria In Two
1,Kanye West,Gold Digger
2,50 Cent,Candy Shop
3,Kanye West,Touch The Sky
4,Eminem,I'M Back
5,Kanye West,Family Business
6,Eminem,The Real Slim Shady
7,Lupe Fiasco,Superstar
8,Kanye West,Lil Jimmy (Skit)
9,Snoop Dogg,Signs


In [16]:
user = 1005

recommendation = get_reco(svd, df_train, user,10, filter=True)
recomended_tracks = [x for x in recommendation['track_id'].values]

recomended_tracks = pd.DataFrame({'track_id': recomended_tracks})
recomended_tracks.merge(df_track, left_on='track_id', right_on='track_id')[['artist_name','track_name']]

Unnamed: 0,artist_name,track_name
0,Ray Lamontagne,Empty
1,Interpol,Evil
2,Kanye West,Stronger
3,Flipsyde,Someday
4,Yann Tiersen,Sur Le Fil
5,Jeff Buckley,"Lover, You Should'Ve Come Over"
6,Feist,One Two Three Four
7,Kanye West,Good Life
8,Kanye West,I Wonder
9,The Gathering,Saturnine
