In [32]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [33]:
#import libraries for exploratory data analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [34]:
df = pd.read_csv('/content/music_dataset.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,artist_name,track_name,release_date,genre,lyrics,len,dating,violence,world/life,...,sadness,feelings,danceability,loudness,acousticness,instrumentalness,valence,energy,topic,age
0,0,mukesh,mohabbat bhi jhoothi,1950,pop,hold time feel break feel untrue convince spea...,95,0.000598,0.063746,0.000598,...,0.380299,0.117175,0.357739,0.454119,0.997992,0.901822,0.339448,0.13711,sadness,1.0
1,4,frankie laine,i believe,1950,pop,believe drop rain fall grow believe darkest ni...,51,0.035537,0.096777,0.443435,...,0.001284,0.001284,0.331745,0.64754,0.954819,2e-06,0.325021,0.26324,world/life,1.0
2,6,johnnie ray,cry,1950,pop,sweetheart send letter goodbye secret feel bet...,24,0.00277,0.00277,0.00277,...,0.00277,0.225422,0.456298,0.585288,0.840361,0.0,0.351814,0.139112,music,1.0
3,10,pérez prado,patricia,1950,pop,kiss lips want stroll charm mambo chacha merin...,54,0.048249,0.001548,0.001548,...,0.225889,0.001548,0.686992,0.744404,0.083935,0.199393,0.77535,0.743736,romantic,1.0
4,12,giorgos papadopoulos,apopse eida oneiro,1950,pop,till darling till matter know till dream live ...,48,0.00135,0.00135,0.417772,...,0.0688,0.00135,0.291671,0.646489,0.975904,0.000246,0.597073,0.394375,romantic,1.0


In [35]:
print(df.isnull().sum())


Unnamed: 0                  0
artist_name                 0
track_name                  0
release_date                0
genre                       0
lyrics                      0
len                         0
dating                      0
violence                    0
world/life                  0
night/time                  0
shake the audience          0
family/gospel               0
romantic                    0
communication               0
obscene                     0
music                       0
movement/places             0
light/visual perceptions    0
family/spiritual            0
like/girls                  0
sadness                     0
feelings                    0
danceability                0
loudness                    0
acousticness                0
instrumentalness            0
valence                     0
energy                      0
topic                       0
age                         0
dtype: int64


Feature Selection Summary
For building the song recommendation system, we selected features based on their relevance to mood and emotional context, focusing on the following:

**Sadness**: Reflects the emotional tone of the song, crucial for matching the listener’s mood.
**Danceability**: Indicates how suitable a song is for dancing, influencing overall mood and engagement.

**Energy**: Represents the intensity and activity level of the song, aligning with the listener's mood.

**Valence**: Measures the musical positiveness, impacting the listener's emotional response.

**Loudness**: Affects the listening experience and can influence the mood.

**Feelings**: Provides additional emotional context to enhance mood-based recommendations.

**Dating**: Reflects themes related to romance or relationships, which can affect mood.

**Romantic**: Directly related to romantic themes, useful for mood-based recommendations.

**Night/Time**: Indicates the suitability of songs for specific times or settings, influencing mood.

**World/Life**: Captures themes related to life experiences and worldview, relevant to mood.


**Family/Spiritual**: Reflects themes related to family or spirituality, contributing to mood.

**Excluded Features**
Violence: Excluded to focus solely on mood-related aspects, as it does not directly contribute to mood-based recommendations.
Topic: Redundant, as it is derived from numeric features already included.
Artist Name: While important for preferences, it does not directly affect the mood or emotional content of the songs.
Age: Not directly relevant to mood or emotional content for this recommendation system.
By concentrating on these mood-related features, the recommendation system aims to align closely with the listener's emotional and contextual preferences, providing a more personalized and engaging experience.

In [36]:
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

features = df[['sadness', 'danceability', 'energy', 'feelings',
               'dating', 'romantic', 'night/time', 'world/life', 'family/spiritual']]
target = df['track_name']
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


In [37]:
X_train_features = X_train.values

# Compute similarity matrix
similarity_matrix = cosine_similarity(X_train_features)

# Create a mapping from index to track names
index_to_track_name = {idx: name for idx, name in enumerate(y_train)}

# Recommendation function
def recommend_songs_with_confidence(song_index, top_n=5):
    # Get similarity scores for the given song
    similarity_scores = list(enumerate(similarity_matrix[song_index]))

    # Sort songs based on similarity scores
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    # Get top_n most similar songs (excluding the song itself)
    top_songs_with_confidence = similarity_scores[1:top_n+1]

    # Convert indices to track names and include confidence scores
    recommendations = {index_to_track_name[i]: score for i, score in top_songs_with_confidence}

    return recommendations

In [46]:
# function that returns corresponding topic on giving track_name

def get_topic_by_track_name(track_name):

  try:
    topic = df[df['track_name'] == track_name]['topic'].values[0]
    return topic
  except IndexError:
    return None


In [48]:
for i in range(10):
  # Print track name and topic
  track_name = y_train.iloc[i]
  print(f"\nTrack: {track_name}")
  print(f"Topic: {df[df['track_name'] == track_name]['topic']}")

  # Recommend songs and print their topics
  print("Recommendations:")
  for song, confidence in recommend_songs_with_confidence(i).items():
    topic = get_topic_by_track_name(song)
    print(f"  - {song}: {topic} (Confidence: {confidence})")

  print("----------------")


Track: from this moment on
Topic: 4552     world/life
12548      romantic
Name: topic, dtype: object
Recommendations:
  - you are my one desire: romantic (Confidence: 0.9899831467379038)
  - near you always: romantic (Confidence: 0.9888016403481412)
  - you've really got a hold on me: romantic (Confidence: 0.9876753472491018)
  - the lovely linda: romantic (Confidence: 0.9860356384200848)
  - in a sentimental mood: romantic (Confidence: 0.9854276511602199)
----------------

Track: struttin' my stuff
Topic: 8688    obscene
Name: topic, dtype: object
Recommendations:
  - sweet talkin' guy: obscene (Confidence: 0.9999570514844447)
  - this romeo ain't got julie yet: violence (Confidence: 0.9999388849062635)
  - ruffneck: obscene (Confidence: 0.9999330504501834)
  - south muzik: obscene (Confidence: 0.9999259185539084)
  - chicken huntin': violence (Confidence: 0.9998373157645785)
----------------

Track: don't you remember?
Topic: 17255    world/life
Name: topic, dtype: object
Recommenda

# *Since I dont have ground truth recomendation I hav simply evaluted the model by obsering if the topic of recomendation matches with song*

# *Since we lack user interaction data I dont think colabortaive recomendation would work here*