In [91]:
!pip install -r requirements.txt

In [92]:
#import libraries for exploratory data analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [93]:
import os

# Check if the directory or extracted file exists
if not os.path.exists('music_dataset.csv'):  # Replace 'music_dataset.csv' with the extracted file/directory name
    !unzip music_dataset.zip
else:
    print("File already exists. Skipping unzip.")

File already exists. Skipping unzip.


In [94]:
df = pd.read_csv('music_dataset.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,artist_name,track_name,release_date,genre,lyrics,len,dating,violence,world/life,...,sadness,feelings,danceability,loudness,acousticness,instrumentalness,valence,energy,topic,age
0,0,mukesh,mohabbat bhi jhoothi,1950,pop,hold time feel break feel untrue convince spea...,95,0.000598,0.063746,0.000598,...,0.380299,0.117175,0.357739,0.454119,0.997992,0.901822,0.339448,0.13711,sadness,1.0
1,4,frankie laine,i believe,1950,pop,believe drop rain fall grow believe darkest ni...,51,0.035537,0.096777,0.443435,...,0.001284,0.001284,0.331745,0.64754,0.954819,2e-06,0.325021,0.26324,world/life,1.0
2,6,johnnie ray,cry,1950,pop,sweetheart send letter goodbye secret feel bet...,24,0.00277,0.00277,0.00277,...,0.00277,0.225422,0.456298,0.585288,0.840361,0.0,0.351814,0.139112,music,1.0
3,10,pérez prado,patricia,1950,pop,kiss lips want stroll charm mambo chacha merin...,54,0.048249,0.001548,0.001548,...,0.225889,0.001548,0.686992,0.744404,0.083935,0.199393,0.77535,0.743736,romantic,1.0
4,12,giorgos papadopoulos,apopse eida oneiro,1950,pop,till darling till matter know till dream live ...,48,0.00135,0.00135,0.417772,...,0.0688,0.00135,0.291671,0.646489,0.975904,0.000246,0.597073,0.394375,romantic,1.0


In [None]:
print(df.isnull().sum())


Unnamed: 0                  0
artist_name                 0
track_name                  0
release_date                0
genre                       0
lyrics                      0
len                         0
dating                      0
violence                    0
world/life                  0
night/time                  0
shake the audience          0
family/gospel               0
romantic                    0
communication               0
obscene                     0
music                       0
movement/places             0
light/visual perceptions    0
family/spiritual            0
like/girls                  0
sadness                     0
feelings                    0
danceability                0
loudness                    0
acousticness                0
instrumentalness            0
valence                     0
energy                      0
topic                       0
age                         0
dtype: int64


In [None]:
df_encoded = pd.get_dummies(df['genre'], prefix='genre').astype(int)

df_encoded = df_encoded.drop('genre_pop', axis=1, errors='ignore')

df = pd.concat([df, df_encoded], axis=1)

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,artist_name,track_name,release_date,genre,lyrics,len,dating,violence,world/life,...,valence,energy,topic,age,genre_blues,genre_country,genre_hip hop,genre_jazz,genre_reggae,genre_rock
0,0,mukesh,mohabbat bhi jhoothi,1950,pop,hold time feel break feel untrue convince spea...,95,0.000598,0.063746,0.000598,...,0.339448,0.13711,sadness,1.0,0,0,0,0,0,0
1,4,frankie laine,i believe,1950,pop,believe drop rain fall grow believe darkest ni...,51,0.035537,0.096777,0.443435,...,0.325021,0.26324,world/life,1.0,0,0,0,0,0,0
2,6,johnnie ray,cry,1950,pop,sweetheart send letter goodbye secret feel bet...,24,0.00277,0.00277,0.00277,...,0.351814,0.139112,music,1.0,0,0,0,0,0,0
3,10,pérez prado,patricia,1950,pop,kiss lips want stroll charm mambo chacha merin...,54,0.048249,0.001548,0.001548,...,0.77535,0.743736,romantic,1.0,0,0,0,0,0,0
4,12,giorgos papadopoulos,apopse eida oneiro,1950,pop,till darling till matter know till dream live ...,48,0.00135,0.00135,0.417772,...,0.597073,0.394375,romantic,1.0,0,0,0,0,0,0


Feature Selection Summary
For building the song recommendation system, we selected features based on their relevance to mood and emotional context, focusing on the following:

**Sadness**: Reflects the emotional tone of the song, crucial for matching the listener’s mood.
**Danceability**: Indicates how suitable a song is for dancing, influencing overall mood and engagement.

**Energy**: Represents the intensity and activity level of the song, aligning with the listener's mood.

**Valence**: Measures the musical positiveness, impacting the listener's emotional response.

**Loudness**: Affects the listening experience and can influence the mood.

**Feelings**: Provides additional emotional context to enhance mood-based recommendations.

**Dating**: Reflects themes related to romance or relationships, which can affect mood.

**Romantic**: Directly related to romantic themes, useful for mood-based recommendations.

**Night/Time**: Indicates the suitability of songs for specific times or settings, influencing mood.

**World/Life**: Captures themes related to life experiences and worldview, relevant to mood.


**Family/Spiritual**: Reflects themes related to family or spirituality, contributing to mood.

**Excluded Features**
Topic: Redundant, as it is derived from numeric features already included.
Artist Name: While important for preferences, it does not directly affect the mood or emotional content of the songs.
Age: Not directly relevant to mood or emotional content for this recommendation system.
By concentrating on these mood-related features, the recommendation system aims to align closely with the listener's emotional and contextual preferences, providing a more personalized and engaging experience.

In [None]:
unique_topics = df['topic'].unique()
print(unique_topics)
unique_genre = df.columns[df.columns.str.startswith('genre_')]
print(unique_genre)

['sadness' 'world/life' 'music' 'romantic' 'violence' 'obscene'
 'night/time' 'feelings']
Index(['genre_blues', 'genre_country', 'genre_hip hop', 'genre_jazz',
       'genre_reggae', 'genre_rock'],
      dtype='object')


In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

features = df[['danceability','dating','energy','family/spiritual','feelings','music',
 'night/time','obscene','romantic','sadness','violence','world/life','genre_blues', 'genre_country', 'genre_hip hop', 'genre_jazz',
       'genre_reggae', 'genre_rock']]
target = df['track_name']
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


In [None]:
X_train_features = X_train.values

# Compute similarity matrix
similarity_matrix = cosine_similarity(X_train_features)

# Create a mapping from index to track names
index_to_track_name = {idx: name for idx, name in enumerate(y_train)}

# Recommendation function
def recommend_songs_with_confidence(song_index, top_n=5):
    # Get similarity scores for the given song
    similarity_scores = list(enumerate(similarity_matrix[song_index]))

    # Sort songs based on similarity scores
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    # Get top_n most similar songs (excluding the song itself)
    top_songs_with_confidence = similarity_scores[1:top_n+1]

    # Convert indices to track names and include confidence scores
    recommendations = {index_to_track_name[i]: score for i, score in top_songs_with_confidence}

    return recommendations

In [None]:
# function that returns corresponding topic on giving track_name

def get_topic_by_track_name(track_name):

  try:
    topic = df[df['track_name'] == track_name]['topic'].values[0]
    genre = df[df['track_name'] == track_name]['genre'].values[0]
    return topic, genre
  except IndexError:
    return None


In [None]:
true_preds =  0
total_preds = 0
for i in range(10):
  # Print track name and topic
  track_name = y_test.iloc[i]
  input_topic,input_genre = get_topic_by_track_name(track_name)
  print(f"\nTrack: {track_name}")
  print(f"Topic: {df[df['track_name'] == track_name]['topic']}")
  print(f"Genre: {df[df['track_name'] == track_name]['genre']}")

  # Recommend songs and print their topics
  print("Recommendations:")
  flag = False
  for song, confidence in recommend_songs_with_confidence(i).items():
    recom_topic,recom_genre = get_topic_by_track_name(song)
    if recom_topic == input_topic or recom_genre == input_genre:
      flag = True
    if flag:
      true_preds += 1
    total_preds += 1
    print(f"  - {song}: {recom_topic}, {recom_genre} (Confidence: {confidence})")

  print("----------------")


Track: velvet light
Topic: 27369    sadness
Name: topic, dtype: object
Genre: 27369    rock
Name: genre, dtype: object
Recommendations:
  - i'll let nothing seperate us: romantic, blues (Confidence: 0.987118263369128)
  - sweet sweet surrender: romantic, blues (Confidence: 0.9870809459764237)
  - so in love: violence, pop (Confidence: 0.9869790773933516)
  - candy: romantic, blues (Confidence: 0.9861260368344879)
  - these arms of mine: romantic, blues (Confidence: 0.9855661102262487)
----------------

Track: andy, you're a star
Topic: 26302    violence
Name: topic, dtype: object
Genre: 26302    rock
Name: genre, dtype: object
Recommendations:
  - brand new girlfriend: obscene, country (Confidence: 0.9990911457614106)
  - livin' like a lonestar: obscene, country (Confidence: 0.998637595750978)
  - corner bar: obscene, country (Confidence: 0.998624311492159)
  - burn: violence, pop (Confidence: 0.9985086051352806)
  - drinkin': obscene, country (Confidence: 0.9984083448804114)
--------

In [None]:
accuracy = true_preds / total_preds
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 34.00%


# *Since I dont have ground truth recomendation I hav simply evaluted the model by obsering if the topic or genre of recomended track matches with input track*

# *Since we lack user interaction data I dont think colabortaive recomendation would work here*