# Assignment

#### Description: In this assignment, you are required to design and/or collect a dataset that explores the relationship between Hindi movie songs, their underlying Raga features, and potential correlations with mental health conditions such as depression, sadness, or related emotional states. Others are Depression, Anxiety Disorders, Schizophrenia, Bipolar Disorder, Post-Traumatic Stress Disorder (PTSD), Autism Spectrum Disorder (ASD), Dementia / Alzheimerâ€™s Disease, Obsessive-Compulsive Disorder (OCD), Sleep Disorders. 


## Focus on the following key aspects: 
    1. Curate a representative collection of Hindi movie songs across diverse time periods, moods, and ragas.
    2. Identify and record relevant (e.g., scale, notes, tempo, mood classification, and other musicological attributes).
    3. Map songs to emotional states associated with mental health (e.g., sadness, joy, melancholy, relaxation, anxiety, depression) based
    on expert annotations, prior literature, or crowd-sourced perception data. 
    4. Include additional contextual details such as movie name, release year, singers, lyricists, instruments used, and lyrical sentiment.


### The primary goal is to build a that can later be used for research on: 
    1. How specific Ragas and song features influence or reflect emotional states.
    2. Understanding whether certain musical patterns are more strongly correlated with mental health indicators like sadness or depression.

In [1]:
import kagglehub

path = kagglehub.dataset_download("joebeachcapital/30000-spotify-songs")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: C:\Users\dell\.cache\kagglehub\datasets\joebeachcapital\30000-spotify-songs\versions\2


In [2]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

In [3]:
import pandas as pd


df = pd.read_csv("songs.csv")


def classify_song(row):
    if row["valence"] < 0.3 and row["energy"] < 0.4 and row["tempo"] < 80:
        return "Depression"
    elif row["energy"] > 0.6 and row["tempo"] > 120 and row["valence"] < 0.4:
        return "Anxiety"
    elif row["instrumentalness"] > 0.7 and (row["tempo"] < 60 or row["tempo"] > 150):
        return "Schizophrenia"
    elif row["valence"] > 0.7 and row["energy"] > 0.7:
        return "Bipolar (Mania)"
    elif row["valence"] < 0.3 and row["energy"] < 0.3:
        return "Bipolar (Depression)"
    elif row["liveness"] > 0.7 and row["valence"] < 0.3:
        return "PTSD"
    elif row["instrumentalness"] > 0.8 and 90 <= row["tempo"] <= 110 and row["speechiness"] < 0.3:
        return "Autism"
    elif row["acousticness"] > 0.7 and row["tempo"] < 70 and row["energy"] < 0.3:
        return "Dementia"
    elif row["tempo"] in range(98, 103) and 0.4 <= row["valence"] <= 0.6:
        return "OCD"
    elif row["energy"] < 0.3 and row["tempo"] < 70 and row["acousticness"] > 0.8 and row["duration_ms"] > 300000:
        return "Sleep Disorder"
    else:
        return "Normal/Unclassified"


df["Mental_Health_Label"] = df.apply(classify_song, axis=1)

df.to_csv("songs_with_mental_health_labels.csv", index=False)



In [4]:
import pandas as pd

df = pd.read_csv("songs_with_mental_health_labels.csv")  

raga_list = [
    "Yaman", "Bhairavi", "Bhairav", "Darbari", "Todi",
    "Bageshree", "Desh", "Hindol", "Kafi", "Khamaj", "Multani",
    "Pilu", "Bihag", "Marwa", "Yaman Kalyan", "Shree"
]

def extract_raga(name):
    name = str(name).lower()
    for raga in raga_list:
        if raga.lower() in name:
            return raga
    return None


df['raga_name'] = df['track_name'].apply(extract_raga)
df['raga_name'] = df['raga_name'].fillna(df['track_album_name'].apply(extract_raga))


def assign_raga_name(row):
   
    if row['raga_name']:
        return row['raga_name']
    
    if row['energy'] < 0.35 and row['tempo'] < 70 and row['acousticness'] > 0.7:
        return "Bhairavi"
    elif row['energy'] < 0.45 and row['tempo'] < 80 and row['acousticness'] > 0.6:
        return "Darbari"
    elif row['energy'] < 0.6 and row['tempo'] < 90:
        return "Todi"
    elif row['energy'] > 0.6 and row['danceability'] > 0.6 and row['tempo'] > 100:
        return "Yaman"
    elif row['energy'] > 0.5 and row['tempo'] > 90:
        return "Bageshree"
    elif row['energy'] > 0.7 and row['danceability'] > 0.7:
        return "Pilu"
    else:
        return "Kafi"  


df['raga_label'] = df.apply(assign_raga_name, axis=1)

df.to_csv("songs_with_raga_labels.csv", index=False)

print("Raga extraction and approximate naming completed!")
print(df[['track_name', 'raga_name', 'raga_label']].head())


Raga extraction and approximate naming completed!
                                          track_name raga_name raga_label
0  I Don't Care (with Justin Bieber) - Loud Luxur...      None      Yaman
1                    Memories - Dillon Francis Remix      None  Bageshree
2                    All the Time - Don Diablo Remix      None      Yaman
3                  Call You Mine - Keanu Silva Remix      None      Yaman
4            Someone You Loved - Future Humans Remix      None      Yaman


In [5]:

df = pd.read_csv("songs_with_emotions.csv")

genre_instruments = {
    "classical": "Violin/Cello/Piano",
    "rock": "Guitar/Drums",
    "pop": "Synth/Guitar/Bass",
    "hip hop": "Synth/Drum Machine",
    "edm": "Synth/Drum Machine",
    "jazz": "Saxophone/Piano/Drums",
    "blues": "Guitar/Harmonica/Piano",
    "country": "Guitar/Banjo/Fiddle",
    "reggae": "Guitar/Bass/Keyboard",
    "metal": "Guitar/Bass/Drums",
    "folk": "Acoustic Guitar/Fiddle",
    "raga": "Sitar/Tabla/Bansuri",
    "indian classical": "Sitar/Tabla/Bansuri"
}


def guess_instrument(row):
    name = str(row.get("playlist_name", "")).lower()
    genre = str(row.get("playlist_genre", "")).lower()


    if "synth" in name or "electronic" in genre:
        return "Synth"
    elif "rock" in genre or "guitar" in name:
        return "Guitar/Drums"
    elif "classical" in genre or "raga" in genre:
        return "Sitar/Tabla"
    elif "flute" in name:
        return "Flute"
    

    for key in genre_instruments:
        if key in genre:
            return genre_instruments[key]
    

    return "Unknown"

df["instrument"] = df.apply(guess_instrument, axis=1)

df.to_csv("songs_with_instruments.csv", index=False)


  df = pd.read_csv("songs_with_emotions.csv")


In [6]:
import pandas as pd

df = pd.read_csv("songs_with_instruments.csv")

def map_emotion(row):
    valence = row["valence"]
    energy = row["energy"]
    tempo = row["tempo"]

    if valence < 0.3 and energy < 0.4:
        return "Sadness/Depression"
    elif valence < 0.4 and energy > 0.6:
        return "Anxiety/Stress"
    elif valence > 0.7 and energy > 0.6:
        return "Joy/Excitement"
    elif 0.4 <= valence <= 0.7 and tempo < 90:
        return "Calm/Relaxation"
    else:
        return "Neutral"

df["Emotional_State"] = df.apply(map_emotion, axis=1)

df.to_csv("songs_with_emotions.csv", index=False)


  df = pd.read_csv("songs_with_instruments.csv")
