### Recomender 

In [37]:
pip install fuzzywuzzy

Note: you may need to restart the kernel to use updated packages.


In [45]:
import pandas as pd
import config
import spotipy
from IPython.display import IFrame
from spotipy.oauth2 import SpotifyClientCredentials
import pickle
from fuzzywuzzy import fuzz

# Initialize Spotipy
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=config.client_id,
                                                           client_secret=config.client_secret))

# Load necessary data
df = pd.read_csv(r"C:\Users\ETTORE\Documents\Iron Hack\week 6\model_cluster_data.csv")
top100_chart = pd.read_csv(r"C:\Users\ETTORE\Documents\Iron Hack\week 6\billboard_data.csv")

# Load scaler and kmeans_model
def load(spot_model="spot_model.pickle"):
    try:
        with open(spot_model, "rb") as f:
            return pickle.load(f)
    except FileNotFoundError:
        print("File not found!")

scaler = load(spot_model="scaler_spot.pickle")
kmeans_model = load(spot_model="spot_model.pickle")

# Function to recommend a song
def recommend_song(input_song):
    # Search for the input song
    results = sp.search(q=input_song, limit=1, market="GB")
    track_id = results["tracks"]["items"][0]["id"]
    
    # Extract audio features of the input song
    song_features = pd.DataFrame(sp.audio_features(track_id)).drop(columns=['analysis_url', 'track_href', 'uri', 'id', 'type',"duration_ms"])
    sf_scaled = pd.DataFrame(scaler.transform(song_features), columns=song_features.columns)
    
    # Predict the cluster of the input song
    predicted_cluster = kmeans_model.predict(sf_scaled)
    predicted_cluster_value = predicted_cluster[0]  # Extracting the cluster value
    
    # Filter dataframe to find a recommended song from the same cluster
    rec_song = df[df['cluster'] == predicted_cluster_value].sample(1)
    return rec_song, track_id

# Get user input
input_song = input("Enter a song: ")

# Preprocess user input and song titles to lowercase
input_song_lower = input_song.lower()
top100_chart['songs_lower'] = top100_chart['songs'].apply(lambda x: x.lower())

# Check if the song is in billboard_data.csv
found_match = False
for song_title_lower in top100_chart['songs_lower']:
    if fuzz.partial_ratio(input_song_lower, song_title_lower) >= 90:  # Using fuzzy matching threshold of 90
        input_song = top100_chart[top100_chart['songs_lower'] == song_title_lower]['songs'].iloc[0]
        found_match = True
        break

if found_match:
    print("Your song is cool!")
    # Search for the song on Spotify
    results = sp.search(q=input_song, limit=1, market="GB")
    track_id = results["tracks"]["items"][0]["id"]
    # Display the Spotify player for the recognized song
    display(IFrame(src="https://open.spotify.com/embed/track/" + track_id, width="320", height="80", frameborder="0", allowtransparency="true", allow="encrypted-media"))
else:
    print("Your song is not in the Billboard chart. We suggest you another song.")
    # Call the recommend_song function
    recommendation, user_track_id = recommend_song(input_song)
    recommended_track_id = recommendation['id'].values[0]
    # Display the Spotify embedded players for the user's input song and the recommended song
    user_track_player = IFrame(src="https://open.spotify.com/embed/track/" + user_track_id,
                               width="320",
                               height="80",
                               frameborder="0",
                               allowtransparency="true",
                               allow="encrypted-media")

    recommended_track_player = IFrame(src="https://open.spotify.com/embed/track/" + recommended_track_id,
                                      width="320",
                                      height="80",
                                      frameborder="0",
                                      allowtransparency="true",
                                      allow="encrypted-media")

    # Display both players
    display(user_track_player)
    display(recommended_track_player)


Enter a song:  uptown funk


Your song is not in the Billboard chart. We suggest you another song.


### Check everything is ok

In [34]:
import numpy as np
np.unique(kmeans_model.labels_)

array([0, 1, 2, 3, 4, 5, 6, 7])

In [35]:
df["cluster"].value_counts()

cluster
2    281
4    265
6    188
7    166
1    148
3    113
0     78
5     34
Name: count, dtype: int64