### Importing Modules

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

### Loading Dataset

In [19]:
df = pd.read_csv('../data/processed/musics.csv')

In [20]:
df.head()

Unnamed: 0,track_id,track_name,track_artist,track_album_id,track_album_name,playlist_id,playlist_genre,playlist_subgenre,danceability,energy,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,track_album_release_year,decade,emotion
0,6f807x0ima9a1j3VPbc7VN,I Don't Care (with Justin Bieber) - Loud Luxur...,Ed Sheeran,2oCs0DGTsRO98Gh5ZSl2Cx,I Don't Care (with Justin Bieber) [Loud Luxury...,37i9dQZF1DXcZDD7cfEKhW,Pop,Dance Pop,0.639315,1.184784,...,0.0583,-0.339496,-0.396881,0.0653,0.057389,0.037703,194754,2019,2010's,Happy
1,0r7CVbZTWZgbTCYdfa2P31,Memories - Dillon Francis Remix,Maroon 5,63rPSO264uRjW1X5E6cWv6,Memories (Dillon Francis Remix),37i9dQZF1DXcZDD7cfEKhW,Pop,Dance Pop,0.487226,0.634033,...,0.0373,-0.472078,-0.379048,0.357,0.80555,-0.783082,162600,2019,2010's,Happy
2,1z1Hg7Vb0AhHDiEmnDE79l,All the Time - Don Diablo Remix,Zara Larsson,1HoSmj2eLcsrR0vE9gThr4,All the Time (Don Diablo Remix),37i9dQZF1DXcZDD7cfEKhW,Pop,Dance Pop,0.134657,1.266579,...,0.0742,-0.440724,-0.396782,0.11,0.463533,0.111062,176616,2019,2010's,Happy
3,75FpbthrwQmzHlBJLuGdC7,Call You Mine - Keanu Silva Remix,The Chainsmokers,1nqYsOef1yKKuGOVchbsk6,Call You Mine - The Remixes,37i9dQZF1DXcZDD7cfEKhW,Pop,Dance Pop,0.431921,1.261126,...,0.102,-0.667817,-0.396841,0.204,-0.972935,0.034727,169093,2019,2010's,Sad
4,1e8PAfcKUYoKkxPhrHqw4x,Someone You Loved - Future Humans Remix,Lewis Capaldi,7m7vv9wlQ4i0LFuJiE2zsQ,Someone You Loved (Future Humans Remix),37i9dQZF1DXcZDD7cfEKhW,Pop,Dance Pop,-0.038171,0.732186,...,0.0359,-0.436693,-0.396881,0.0833,0.942356,0.109871,189052,2019,2010's,Happy


In [21]:
df['playlist_genre'].value_counts()

playlist_genre
Rap      5139
Pop      4934
Edm      4810
R&B      4199
Latin    3956
Rock     3632
Name: count, dtype: int64

In [22]:
df['playlist_subgenre'].value_counts()

playlist_subgenre
Indie Poptimism              1523
Southern Hip Hop             1422
Progressive Electro House    1412
Electro House                1407
Neo Soul                     1394
Hip Hop                      1287
Dance Pop                    1265
Gangster Rap                 1224
Trap                         1206
Electropop                   1172
Tropical                     1144
Urban Contemporary           1124
Latin Hip Hop                1118
Hard Rock                    1056
Big Room                     1028
Latin Pop                    1028
Post-Teen Pop                 974
Pop Edm                       963
New Jack Swing                901
Album Rock                    891
Classic Rock                  872
Permanent Wave                813
Hip Pop                       780
Reggaeton                     666
Name: count, dtype: int64

In [23]:
df['emotion'].value_counts()

emotion
Happy       8064
Sad         7246
Exciting    6357
Chill       2749
Epic        2254
Name: count, dtype: int64

In [24]:
df['decade'].value_counts()

decade
2010's    19624
2000's     3321
1990's     1680
1980's      794
2020's      626
1970's      525
1960's       98
1950's        2
Name: count, dtype: int64

In [60]:
df['track_artist'].value_counts()

track_artist
Queen                        128
Martin Garrix                 87
Don Omar                      82
David Guetta                  80
Dimitri Vegas & Like Mike     68
                            ... 
Aitor Blond                    1
Timmo Hendriks                 1
Lowkey                         1
Domastic                       1
Lab's Cloud                    1
Name: count, Length: 10315, dtype: int64

### Music Filtration

In [61]:
def filter_music(df, artist=None, album=None, genres=None, subgenres=None, emotions=None, decades=None):
    """
    Filters music based on specified criteria.
    
    Args:
        df (pd.DataFrame): The music dataset.
        genres (list, optional): List of genres to include.
        subgenres (list, optional): List of subgenres to include.
        emotions (list, optional): List of emotions to include.
        decades (list, optional): List of decades to include.
    
    Returns:
        pd.DataFrame: A DataFrame filtered according to the criteria.
    """
    if artist:
        df = df[df['track_artist'].isin(artist)]

    if album:
        df = df[df['track_album_name'].isin(album)]

    if genres:
        df = df[df['playlist_genre'].isin(genres)]
    
    if subgenres:
        df = df[df['playlist_subgenre'].isin(subgenres)]    

    if emotions:
        df = df[df['emotion'].isin(emotions)]
    
    if decades:
        df = df[df['decade'].isin(decades)]
    
    return df

In [63]:
filtered_df = filter_music(df, artist=['Travis Scott'], genres=['Rap'], subgenres=['Hip Hop'], emotions=['Sad'], decades=["2010's"])
filtered_df[['track_name', 'track_artist', 'playlist_genre', 'playlist_subgenre', 'emotion', 'decade']].head()

filtered_df.head()

Unnamed: 0,track_id,track_name,track_artist,track_album_id,track_album_name,playlist_id,playlist_genre,playlist_subgenre,danceability,energy,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,track_album_release_year,decade,emotion
4964,7h0d2h0fUmzbs7zeFigJPn,HIGHEST IN THE ROOM (feat. ROSALÍA & Lil Baby)...,Travis Scott,1Sf8GsXG32t0jNrX11xqWx,JACKBOYS,37i9dQZF1DX0XUsuxWHRQd,Rap,Hip Hop,-0.667265,-1.132733,...,0.0346,-0.542401,-0.396881,0.0992,-1.669793,1.194443,244874,2019,2010's,Sad
5558,5cgEadiGKjtTP1Q6NJqVP5,A-Team,Travis Scott,4Q9gIbDuQiVOexTyDxx9V0,A-Team,37i9dQZF1DWT5MrZnPU1zD,Rap,Hip Hop,-0.204086,-0.941879,...,0.0452,-0.57913,-0.370322,0.107,-1.430381,0.258709,176640,2016,2010's,Sad
5717,7wBJfHzpfI3032CSD7CE2m,STARGAZING,Travis Scott,41GuZcammIkupMPKH2OJ6I,ASTROWORLD,37i9dQZF1DWVdgXTbYm2r0,Rap,Hip Hop,-1.16501,0.492255,...,0.044,-0.753951,-0.396864,0.189,-1.916044,1.07823,270715,2018,2010's,Sad


In [64]:
print(filtered_df['track_artist'].value_counts().head(10))
print(filtered_df['track_album_release_year'].value_counts().sort_index())

track_artist
Travis Scott    3
Name: count, dtype: int64
track_album_release_year
2016    1
2018    1
2019    1
Name: count, dtype: int64
