<a href="https://colab.research.google.com/github/Nduta-Nduta/Vibe-Mate/blob/master/Vibe_Mate_Phase4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install Spotipy for interacting with Spotify API
!pip install spotipy




In [4]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from collections import defaultdict
import pandas as pd
import numpy as np


In [6]:
try:
    # Try to use Kaggle secrets (per original instructions)
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    spotify_client_id = user_secrets.get_secret("SPOTIFY_CLIENT_ID")
    spotify_client_secret = user_secrets.get_secret("SPOTIFY_CLIENT_SECRET")
except ModuleNotFoundError:
    # Fallback for Colab: manually enter credentials
    print("kaggle_secrets not found. Enter Spotify credentials for Colab:")
    spotify_client_id = input("Spotify Client ID: ")
    spotify_client_secret = input("Spotify Client Secret: ")

# Authenticate using Client Credentials Flow
spotify = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
    client_id=spotify_client_id,
    client_secret=spotify_client_secret
))


kaggle_secrets not found. Enter Spotify credentials for Colab:
Spotify Client ID: de20de49f3d4484c87ca8ddc962e5609
Spotify Client Secret: fa90aa826e2f4d788eb0ae5f62bf0431


In [10]:
import pandas as pd

# Load the dataset
df = pd.read_csv('dataset.csv')

# Keep only allowed metadata/features for compliance
allowed_features = ['track_name', 'artists', 'danceability', 'energy', 'valence', 'tempo', 'popularity']
df = df[allowed_features].copy()

# Check the first few rows
df.head()


Unnamed: 0,track_name,artists,danceability,energy,valence,tempo,popularity
0,Comedy,Gen Hoshino,0.676,0.461,0.715,87.917,73
1,Ghost - Acoustic,Ben Woodward,0.42,0.166,0.267,77.489,55
2,To Begin Again,Ingrid Michaelson;ZAYN,0.438,0.359,0.12,76.332,57
3,Can't Help Falling In Love,Kina Grannis,0.266,0.0596,0.143,181.74,71
4,Hold On,Chord Overstreet,0.618,0.443,0.167,119.949,82


In [17]:
def find_song(song_name, artist_name=None):
    """
    Search for a song in the dataset using partial/fuzzy match.
    Handles NaN values in track_name and artists columns.
    Returns the first matching row as a dictionary, else None.
    """
    song_name = song_name.strip().lower()

    # Filter songs containing the input song name, ignore NaN
    matches = df[df['track_name'].notna() & df['track_name'].str.lower().str.contains(song_name, regex=False)]

    if artist_name:
        artist_name = artist_name.strip().lower()
        def artist_match(row):
            if pd.isna(row):
                return False
            artists_list = [a.strip().lower() for a in row.split(',')]
            return artist_name in artists_list
        matches = matches[matches['artists'].apply(artist_match)]

    return matches.iloc[0].to_dict() if not matches.empty else None


In [18]:
result = find_song("Blinding Lights")
print(result)


{'track_name': 'Blinding Lights', 'artists': 'Kidz Bop Kids', 'danceability': 0.606, 'energy': 0.775, 'valence': 0.867, 'tempo': 171.011, 'popularity': 0}


In [21]:
def find_song(song_name, artist_name=None):
    """
    Search for a song in the dataset using the track_name only.
    Returns the first matching row as a dictionary, else None.
    """
    song_name = song_name.strip().lower()

    # Filter songs by title only, ignore NaN
    matches = df[df['track_name'].notna() & df['track_name'].str.lower().str.contains(song_name, regex=False)]

    return matches.iloc[0].to_dict() if not matches.empty else None


In [22]:
song_info = find_song("Blinding Lights")
print(song_info)


{'track_name': 'Blinding Lights', 'artists': 'Kidz Bop Kids', 'danceability': 0.606, 'energy': 0.775, 'valence': 0.867, 'tempo': 171.011, 'popularity': 0}


In [23]:
def get_song_data(song_name, artist_name=None):
    """
    Get song data from dataset or Spotify API if not in dataset.
    Returns a dictionary with track_name, artists, danceability, energy, valence, tempo, popularity.
    """
    # Try the dataset first
    song = find_song(song_name, artist_name)
    if song is not None:
        return song

    # If not in dataset, fetch from Spotify API
    results = spotify.search(q=song_name, type='track', limit=1)
    items = results['tracks']['items']
    if items:
        track = items[0]
        features = spotify.audio_features(track['id'])[0]
        return {
            'track_name': track['name'],
            'artists': ', '.join([a['name'] for a in track['artists']]),
            'danceability': features['danceability'],
            'energy': features['energy'],
            'valence': features['valence'],
            'tempo': features['tempo'],
            'popularity': track['popularity']
        }

    # If not found at all
    return None


In [24]:
song_info = get_song_data("Blinding Lights")
print(song_info)


{'track_name': 'Blinding Lights', 'artists': 'Kidz Bop Kids', 'danceability': 0.606, 'energy': 0.775, 'valence': 0.867, 'tempo': 171.011, 'popularity': 0}


In [25]:
import numpy as np

def get_mean_vector(songs, features=['danceability', 'energy', 'valence', 'tempo']):
    """
    Calculate the mean vector of numerical features for a list of songs.

    Parameters:
    - songs: list of song titles (strings)
    - features: list of numerical feature names to include

    Returns:
    - numpy array of mean feature values
    """
    vectors = []

    for song in songs:
        data = get_song_data(song)
        if data:
            vectors.append([data.get(f, 0) for f in features])

    if vectors:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(len(features))


In [26]:
mean_vector = get_mean_vector(["Blinding Lights", "Levitating"])
print(mean_vector)


[  0.656    0.7865   0.8395 137.014 ]


In [27]:
from collections import defaultdict

def flatten_dict_list(dict_list):
    """
    Flatten a list of dictionaries into a dictionary of lists.

    Parameters:
    - dict_list: list of dictionaries

    Returns:
    - dict where each key maps to a list of values from all dictionaries
    """
    flat_dict = defaultdict(list)
    for d in dict_list:
        for key, value in d.items():
            flat_dict[key].append(value)
    return dict(flat_dict)


In [28]:
sample = [
    {'track_name': 'Blinding Lights', 'danceability': 0.51, 'energy': 0.73},
    {'track_name': 'Levitating', 'danceability': 0.7, 'energy': 0.8}
]

flat = flatten_dict_list(sample)
print(flat)


{'track_name': ['Blinding Lights', 'Levitating'], 'danceability': [0.51, 0.7], 'energy': [0.73, 0.8]}


In [29]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def recommend_songs(input_songs, top_n=5, features=['danceability', 'energy', 'valence', 'tempo']):
    """
    Recommend songs similar to the input list.

    Parameters:
    - input_songs: list of song names
    - top_n: number of recommendations
    - features: numerical features to compare

    Returns:
    - list of recommended song dictionaries
    """
    # Compute mean vector for input songs
    mean_vec = get_mean_vector(input_songs, features)

    # Prepare dataset feature matrix
    df_features = df[features].fillna(0).to_numpy()

    # Compute cosine similarity
    similarities = cosine_similarity([mean_vec], df_features)[0]

    # Get top N similar indices (exclude input songs themselves)
    input_set = set([s.lower() for s in input_songs])
    recommended = []

    for idx in np.argsort(similarities)[::-1]:
        song_row = df.iloc[idx]
        if song_row['track_name'].lower() not in input_set:
            recommended.append(song_row.to_dict())
        if len(recommended) >= top_n:
            break

    return recommended


In [32]:
# Input multiple songs
input_songs = ["Blinding Lights", "Levitating", "Save Your Tears"]

# Get top 5 recommendations
recommendations = recommend_songs(input_songs, top_n=5)

# Display results
for i, song in enumerate(recommendations, 1):
    print(f"{i}. {song['track_name']} by {song['artists']}")


1. The Hops of Guldenberg by Punch Brothers
2. Desculpe Amor by Unha Pintada
3. Tus Ojos by Manolo Muñoz
4. Proteção de Tela by Rafinha o Big Love
5. The Final Countdown by Pickin' On Series
