In [40]:
import pandas as pd
import ast
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score

In [41]:
# Load the cleaned genres dataset
genres_df = pd.read_csv('cleaned_genres.csv')

In [42]:
# Expanded Genre to Emotion Mapping

emotion_mapping = {
    # Indie genres
    "indie psych-pop": ["dreamy", "uplifting", "psychedelic", "hopeful"],
    "indie psychedelic rock": ["trippy", "adventurous", "surreal", "free-spirited"],
    "indie punk": ["rebellious", "energetic", "angsty", "defiant"],
    "indie quebecois": ["introspective", "moody", "melancholic", "artistic"],
    "indie r&b": ["sensual", "soulful", "smooth", "romantic"],
    "indie rock": ["youthful", "exploratory", "confident", "laid-back"],
    "indie shoegaze": ["ethereal", "nostalgic", "dreamlike", "distant"],
    
    # Indonesian genres
    "indonesian blues": ["soulful", "reflective", "melancholy", "warm"],
    "indonesian electronic": ["energized", "hypnotic", "futuristic", "immersive"],
    "indonesian experimental": ["abstract", "unpredictable", "unconventional", "thought-provoking"],
    "indonesian hip hop": ["gritty", "motivational", "bold", "authentic"],
    "indonesian indie": ["nostalgic", "dreamy", "relaxed", "introspective"],
    
    # Industrial genres
    "industrial black metal": ["intense", "dark", "aggressive", "raw"],
    "industrial hip hop": ["edgy", "rebellious", "mechanical", "futuristic"],
    "industrial metal": ["powerful", "chaotic", "energetic", "intense"],
    
    # Instrumental genres
    "instrumental bluegrass": ["optimistic", "uplifting", "joyful", "earthy"],
    "instrumental death metal": ["dark", "complex", "aggressive", "melancholic"],
    "instrumental funk": ["groovy", "playful", "upbeat", "cool"],
    "instrumental post-rock": ["reflective", "expansive", "cinematic", "meditative"],
    
    # Irish genres
    "irish black metal": ["dark", "intense", "mysterious", "melancholic"],
    "irish folk": ["warm", "joyful", "storytelling", "nostalgic"],
    "irish indie rock": ["melancholic", "youthful", "free-spirited", "contemplative"],
    
    # Italian genres
    "italian hip hop": ["energetic", "bold", "motivational", "streetwise"],
    "italian indie pop": ["dreamy", "romantic", "melancholic", "youthful"],
    "italian gothic metal": ["dark", "dramatic", "haunting", "intense"],
    "italian pop": ["vibrant", "romantic", "uplifting", "joyful"],
    
    # J-pop and K-pop
    "j-pop": ["energetic", "colorful", "cheerful", "youthful"],
    "j-rock": ["dynamic", "intense", "empowering", "youthful"],
    "k-pop": ["catchy", "energetic", "bright", "cheerful"],
    "k-rap": ["bold", "confident", "motivational", "streetwise"],
    
    # Latin genres
    "latin pop": ["romantic", "passionate", "uplifting", "vibrant"],
    "latin rock": ["intense", "energetic", "rebellious", "passionate"],
    "latin jazz": ["sophisticated", "smooth", "soulful", "relaxed"],
    
    # Metal genres
    "melodic death metal": ["intense", "melancholic", "complex", "emotional"],
    "melodic metalcore": ["aggressive", "empowering", "dramatic", "bold"],
    "progressive metal": ["complex", "introspective", "expansive", "cerebral"],
    
    # Neo-soul and R&B
    "neo soul": ["soulful", "romantic", "smooth", "reflective"],
    "r&b": ["sensual", "romantic", "smooth", "intimate"],
    
    # Electronic genres
    "synthwave": ["nostalgic", "futuristic", "dreamy", "energizing"],
    "tech house": ["groovy", "hypnotic", "steady", "energetic"],
    "trance": ["uplifting", "euphoric", "dreamlike", "energized"],
    
    # Rock and Punk
    "punk rock": ["rebellious", "raw", "energetic", "angsty"],
    "classic rock": ["nostalgic", "powerful", "free-spirited", "empowering"],
    "progressive rock": ["introspective", "complex", "expansive", "dreamy"],
    
    # Blues and Jazz
    "blues": ["soulful", "melancholic", "reflective", "warm"],
    "jazz": ["sophisticated", "relaxed", "smooth", "improvisational"],
    "jazz fusion": ["dynamic", "complex", "innovative", "cerebral"],
    
    # Other genres
    "folk": ["earthy", "nostalgic", "storytelling", "heartwarming"],
    "lo-fi beats": ["chill", "relaxed", "introspective", "soothing"],
    "world music": ["vibrant", "cultural", "expansive", "dynamic"],
    
    # Hip hop and Rap
    "hip hop": ["bold", "motivational", "streetwise", "intense"],
    "trap": ["edgy", "dark", "gritty", "empowering"],
    "conscious rap": ["thought-provoking", "introspective", "serious", "emotive"],
    
    # Pop
    "pop": ["catchy", "uplifting", "bright", "cheerful"],
    "dream pop": ["ethereal", "dreamlike", "introspective", "romantic"],
    "pop rock": ["youthful", "empowering", "vibrant", "feel-good"],
    
    # Classical and Orchestral
    "neo-classical": ["reflective", "emotional", "melancholic", "expansive"],
    "orchestral": ["dramatic", "epic", "emotional", "cinematic"]
}

In [43]:
# Add more genres from the new list provided
additional_genres = pd.read_csv('cleaned_genres.csv')

# Extend the emotion_genre_map with additional genres
for genre in additional_genres:
    if genre not in emotion_mapping:
        # Assign default or common emotional tags if specific ones are not provided
        emotion_mapping[genre] = "varied emotions, alternative, branch genre"

In [44]:
# List of dataset filenames
files = [
    'alternative_music_data.csv', 
    'blues_music_data.csv', 
    'hiphop_music_data.csv',
    'indie_alt_music_data.csv',
    'metal_music_data.csv', 
    'pop_music_data.csv', 
    'rock_music_data.csv'
]

# Load each CSV file into a DataFrame and store in a list
dataframes = [pd.read_csv(file) for file in files]

# Combine all DataFrames into a single DataFrame
data = pd.concat(dataframes, ignore_index=True)

In [45]:
print(data.columns)

Index(['Artist Name', 'Track Name', 'Popularity', 'Genres', 'Playlist',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'id', 'uri', 'track_href', 'analysis_url', 'duration_ms',
       'time_signature'],
      dtype='object')


In [46]:
# Convert 'genres' column from string representation of list to actual list
data['Genres'] = data['Genres'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Define the features including genre
features = ['danceability', 'energy', 'valence', 'tempo', 'genres']

# Ensure that the 'genre' column exists in your data
if 'genres' not in data.columns:
    raise ValueError("The 'genres' column is missing in the dataset.")

ValueError: The 'genres' column is missing in the dataset.