In [4]:
import os
import kagglehub
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import hstack


path = kagglehub.dataset_download("thebumpkin/10400-classic-hits-10-genres-1923-to-2023")
print("Path to dataset files:", path)


print("\nFiles in dataset directory:")
print(os.listdir(path))


actual_filename = [f for f in os.listdir(path) if f.endswith(".csv")][0]
csv_file = os.path.join(path, actual_filename)


df = pd.read_csv(csv_file)


df.fillna('', inplace=True)
df['combined_text'] = df['Track'] + ' ' + df['Artist'] + ' ' + df['Genre']


tfidf = TfidfVectorizer()
text_features = tfidf.fit_transform(df['combined_text'])


num_features = df[['Danceability', 'Energy', 'Valence', 'Tempo']].copy()
num_features.fillna(0, inplace=True)
scaler = StandardScaler()
num_scaled = scaler.fit_transform(num_features)


all_features = hstack([text_features, num_scaled]).tocsr() 


user_input = input("\nEnter a song title or artist: ").lower()


matches = df[df['Track'].str.lower().str.contains(user_input) | df['Artist'].str.lower().str.contains(user_input)]

if matches.empty:
    print("No exact song match found. Please try again.")
else:
    # Use the first match
    match_index = matches.index[0]
    print(f"\nFound input song: {df.loc[match_index, 'Track']} by {df.loc[match_index, 'Artist']}")


    user_vector = all_features[match_index]
    similarities = cosine_similarity(user_vector, all_features).flatten()


    similarities[match_index] = -1


    best_index = similarities.argmax()
    similarity_score = similarities[best_index] * 100


    print("\nMost similar song recommendation:")
    print("Track:", df.loc[best_index, 'Track'])
    print("Artist:", df.loc[best_index, 'Artist'])
    print("Year:", df.loc[best_index, 'Year'])
    print("Genre:", df.loc[best_index, 'Genre'])
    print(f"Similarity Score: {similarity_score:.2f}%")


Path to dataset files: /kaggle/input/10400-classic-hits-10-genres-1923-to-2023

Files in dataset directory:
['ClassicHit.csv']



Enter a song title or artist:  This Time I’m In It For Love



Found input song: This Time I’m In It For Love by Player

Most similar song recommendation:
Track: Wheels
Artist: The String-A-Longs
Year: 1961
Genre: Pop
Similarity Score: 88.38%
