<a href="https://colab.research.google.com/github/CamCranda11/MLFA25Project/blob/main/MLProjectModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

song_data = pd.read_csv('/content/sample_data/spotify_data.csv')

print("--- Original Data ---")
print(song_data.head())
print("\n")

In [None]:
features_to_cluster = ['danceability', 'energy', 'mode']
features_df = song_data[features_to_cluster].copy()

mode_value = features_df['mode'].mode()[0]
features_df['mode'] = features_df['mode'].fillna(mode_value)

scaler = StandardScaler()
scaled_features_df = scaler.fit_transform(features_df)

print("--- Scaled Features (First 5 Rows) ---")
print(scaled_features_df[:5])
print("\n")

In [None]:
optimal_k = 4

kmeans_model = KMeans(n_clusters=optimal_k, init='k-means++', n_init=10, random_state=42)
kmeans_model.fit(scaled_features_df)

cluster_labels = kmeans_model.labels_

song_cluster_df = song_data.copy()
song_cluster_df['cluster_id'] = cluster_labels

print("--- Data with Cluster IDs ---")
display(song_cluster_df.head())
print("\n")

In [13]:
def recommend_songs(input_song_name, data_df, num_recs=3):
    """
    Recommends songs from the same cluster as the input song.
    """
    try:
        song_row = data_df[data_df['track_name'] == input_song_name].iloc[0]
        song_cluster = song_row['cluster_id']

        recommendations = data_df[
            (data_df['cluster_id'] == song_cluster) &
            (data_df['track_name'] != input_song_name)
        ]
        if len(recommendations) == 0:
            return "No similar songs found."

        return recommendations.sample(min(num_recs, len(recommendations)))

    except IndexError:
        return f"Song '{input_song_name}' not found in the dataset."

In [None]:
my_song = 'I Won\'t Give Up'
recommendations = recommend_songs(my_song, song_cluster_df, num_recs=5)

print(f"--- Recommendations based on '{my_song}' ---")
display(recommendations[['track_name', 'cluster_id']])

print("\n")

my_song_2 = 'Fast Car'
recommendations_2 = recommend_songs(my_song_2, song_cluster_df, num_recs=5)

print(f"--- Recommendations based on '{my_song_2}' ---")
display(recommendations_2[['track_name', 'cluster_id']])