#                                                       importations initiales

In [5]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer

# lecture du dataset et base du système de reco

In [11]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import average_precision_score
from sklearn.feature_extraction.text import TfidfVectorizer

# Lecture du fichier CSV
music = pd.read_csv('rym_top_5000_all_time.csv')

# Suppression des colonnes inutiles
music = music.drop(columns=['Ranking', 'Average Rating', 'Number of Ratings', 'Number of Reviews'])

# Encodage des genres
music = pd.get_dummies(music, columns=['Genres'])

# Remplacement des valeurs manquantes dans la colonne Descriptors par une chaîne vide
music['Descriptors'] = music['Descriptors'].fillna('')

# Transformation de la colonne Descriptors en vecteurs TF-IDF
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(music['Descriptors'])

# Conversion de la matrice TF-IDF en DataFrame
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), index=music.index, columns=tfidf.get_feature_names_out())

# Conserver la colonne 'Album' pour les recommandations
albums = music[['Album']]

# Fusion des données TF-IDF avec les autres caractéristiques
music = pd.concat([music.drop(columns=['Descriptors', 'Artist Name', 'Release Date', 'Album']), tfidf_df], axis=1)

# Calcul de la similarité cosinus entre les albums
similarity = cosine_similarity(music)

# Création d'un DataFrame pour la matrice de similarité
similarity_df = pd.DataFrame(similarity, index=albums.index, columns=albums.index)

# Fonction de recommandation
def get_recommendation(title, similarity_df, albums, top_n=5):
    # Obtenir l'index de l'album donné
    idx = albums[albums['Album'] == title].index[0]
    # Obtenir les scores de similarité pour l'album donné
    scores = similarity_df[idx].sort_values(ascending=False)
    # Obtenir les noms des albums les plus similaires
    similar_albums = albums.loc[scores.iloc[1:top_n+1].index, 'Album']
    return similar_albums.tolist()

# Utilisation de la fonction de recommandation
print(get_recommendation('Ok Computer', similarity_df, albums))

# Fonction pour calculer le Reciprocal Rank
def reciprocal_rank(y_true, y_scores):
    """Calculate the Reciprocal Rank."""
    sorted_indices = sorted(range(len(y_scores)), key=lambda i: y_scores[i], reverse=True)
    for i in sorted_indices:
        if y_true[i] == 1:  # Trouver le premier élément pertinent
            return 1 / (i + 1)
    return 0

# Fonction d'évaluation
def evaluation(similarity_df, albums, top_n=10):
    """Evaluate Mean Average Precision (MAP) and Mean Reciprocal Rank (MRR)."""
    average_precisions = []
    reciprocal_ranks = []

    for idx in similarity_df.index:
        # Obtenir les recommandations
        recommendations = get_recommendation(albums.loc[idx, 'Album'], similarity_df, albums, top_n)

        # Supposer que la première recommandation doit correspondre à l'album
        y_true = [1] + [0] * (top_n - 1)
        y_scores = [similarity_df.loc[idx, albums[albums['Album'] == rec].index[0]] for rec in recommendations]

        # Calculer les métriques
        average_precisions.append(average_precision_score(y_true, y_scores))
        reciprocal_ranks.append(reciprocal_rank(y_true, y_scores))

    # Calculer les scores moyens
    mean_average_precision = sum(average_precisions) / len(average_precisions)
    mean_reciprocal_rank = sum(reciprocal_ranks) / len(reciprocal_ranks)

    return mean_average_precision, mean_reciprocal_rank

# Évaluation du modèle
map_score, mrr_score = evaluation(similarity_df, albums)
print(f'Mean Average Precision (MAP): {map_score}')
print(f'Mean Reciprocal Rank (MRR): {mrr_score}')

['Milo Goes to College', 'Lights Camera Revolution', 'Doolittle', 'Setting Sons', 'Suicidal Tendencies']


KeyboardInterrupt: 