# ML_Pipeline für Song Empfehlungssystem

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [5]:
# Daten laden
df = pd.read_csv("/Users/michel/Documents/Data Scientist/Abschlussprojekt Song empfehlung/data/raw/mix_master.csv")

## Daten vorbereiten

In [7]:
# Release-Jahr extrahieren
df["release_year"] = pd.to_datetime(df["release_date"], errors="coerce").dt.year.fillna(0).astype(int)
df["release_year"]


0       2022
1       2022
2       2022
3       2022
4       2019
        ... 
2706    2023
2707    2023
2708    2023
2709    2023
2710    2023
Name: release_year, Length: 2711, dtype: int64

In [8]:
# Genres vorbereiten
df["artist_genres"] = df["artist_genres"].fillna("")


In [9]:
# TF-IDF für Genres + Artists
vectorizer = TfidfVectorizer(stop_words="english")
genre_matrix = vectorizer.fit_transform(df["artist_genres"])

artist_vectorizer = TfidfVectorizer(stop_words="english")
artist_matrix = artist_vectorizer.fit_transform(df["artist_name"])

In [10]:
# Numerische Features
numerical = df[["release_year", "explicit"]].fillna(0)
scaler = StandardScaler()
num_scaled = scaler.fit_transform(numerical)

In [12]:
# Alles kombinieren
from scipy.sparse import hstack
feature_matrix = hstack([genre_matrix, artist_matrix, num_scaled])

In [13]:
# Cosine Similarity
cos_sim = cosine_similarity(feature_matrix)

In [14]:
def recommend(song_name, n=5):
    idx = df[df["track_name"].str.lower() == song_name.lower()].index[0]
    scores = list(enumerate(cos_sim[idx]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    top_indices = [i for i, _ in scores[1:n+1]]
    return df.iloc[top_indices][["track_name", "artist_name", "artist_genres", "release_year"]]

# Beispiel
print(recommend("Blinding Lights", n=5))

                                             track_name  \
58                                     Call Out My Name   
54                                          Die For You   
820                                             LA FAMA   
1981                        Wake Me Up - Single Version   
31    Save Your Tears (Remix) (with Ariana Grande) -...   

                    artist_name           artist_genres  release_year  
58                   The Weeknd                                  2018  
54                   The Weeknd                                  2016  
820         ROSALÍA, The Weeknd                   latin          2022  
1981        The Weeknd, Justice  french house, new rave          2025  
31    The Weeknd, Ariana Grande                     pop          2020  
