In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from fairlearn.metrics import MetricFrame, selection_rate, demographic_parity_difference
from fairlearn.postprocessing import ThresholdOptimizer
from sklearn.neighbors import NearestNeighbors

In [None]:
dataset = pd.read_csv('tcc_ceds_music.csv') # genres: 'pop', 'country', 'blues', 'jazz', 'reggae', 'rock', 'hip hop'
dataset['artist_name'] = dataset['artist_name'].str.lower()

gender = pd.read_csv('gender.csv')
gender['NAME'] = gender['NAME'].str.lower()
gender.rename(columns={'NAME': 'artist_name'}, inplace=True)

combined = dataset.merge(right = gender, how = 'inner', on = 'artist_name')

In [20]:
# Prepare datasets and combine them

def remove_stuff(df, column):
    to_remove = ['[',']','\n','\r',"\""," "]
    for item in to_remove:
        df[column] = df[column].str.replace(item, '')
    return df

gender_new = remove_stuff(gender, 'GENRE')

gender_new['GENRE'] = gender_new['GENRE'].str.split(',')
print(gender_new.head(2))

combined1 = dataset.merge(right = gender_new, how = 'inner', on = 'artist_name')
combined1
filtered_df = combined1[combined1.apply(
        lambda row: any(row['genre'].lower() == g.lower() for g in row['GENRE']) if isinstance(row['GENRE'], list) else False, 
        axis=1
)]

filtered_df = filtered_df.dropna()

   CHARTMETRIC_ID artist_name ARTIST_COUNTRY    PRONOUN GENDER IS_BAND  \
0        10862574    kay yang            NaN     he/him   male   False   
1        11051639   2002 mobb            NaN  they/them   male    True   

                                  GENRE  
0                              [Others]  
1  [dance, hip-hop/rap, alternativerap]  


In [None]:
# filtered_df.columns
# Index(['Unnamed: 0', 'artist_name', 'track_name', 'release_date', 'genre',
#        'lyrics', 'len', 'dating', 'violence', 'world/life', 'night/time',
#        'shake the audience', 'family/gospel', 'romantic', 'communication',
#        'obscene', 'music', 'movement/places', 'light/visual perceptions',
#        'family/spiritual', 'like/girls', 'sadness', 'feelings', 'danceability',
#        'loudness', 'acousticness', 'instrumentalness', 'valence', 'energy',
#        'topic', 'age', 'CHARTMETRIC_ID', 'ARTIST_COUNTRY', 'PRONOUN', 'GENDER',
#        'IS_BAND', 'GENRE'],
#       dtype='object')

GENDER
androgynous         7
female            774
genderfluid         7
male             2991
mixed             174
non-binary         13
not specified       5
Name: count, dtype: int64

In [5]:
filtered_df.groupby('genre').count().iloc[:,0]

genre
blues       231
country     736
jazz         87
pop        1692
reggae      161
rock       1064
Name: Unnamed: 0, dtype: int64

In [6]:
num_features = ['len', 'dating', 'violence', 'world/life', 'night/time',
       'shake the audience', 'family/gospel', 'romantic', 'communication',
       'obscene', 'music', 'movement/places', 'light/visual perceptions',
       'family/spiritual', 'like/girls','feelings', 'danceability',
       'loudness', 'acousticness', 'instrumentalness', 'valence', 'energy',
        'age']

categor = ['artist_name', 'track_name', 'genre', 'GENDER', 'topic','ARTIST_COUNTRY']

filtered = filtered_df.filter(num_features)


model = NearestNeighbors(n_neighbors=6,metric = 'cosine')
model.fit(filtered)

In [7]:
import random
def recommend_songs_by_genre(user_genre, model, filtered, df, num_features, n_recommendations=50):
    # Filter the original DataFrame to only include songs of the specified genre
    genre_filtered_df = df[df['genre'].str.lower() == user_genre.lower()].copy()
    
    if genre_filtered_df.empty:
        print(f"No songs found for the genre '{user_genre}'. Please try a different genre.")
        return None

    # Use the same indices in `filtered` as in `genre_filtered_df`
    genre_indices = genre_filtered_df.index
    filtered_genre_data = filtered.loc[genre_indices]

    # Select a random song from the filtered genre dataset
    random_index = random.choice(genre_indices)
    
    # Extract the feature values for the selected song and keep it as a DataFrame
    input_features = filtered.loc[[random_index]]

    # Find the nearest neighbors using the pre-trained model
    distances, indices = model.kneighbors(input_features, n_neighbors=n_recommendations + 1)

    # Get the indices of recommended songs (excluding the input song itself)
    recommended_indices = indices.flatten()[1:]
    
    # Map the indices back to the original DataFrame
    return df.iloc[recommended_indices][['artist_name', 'track_name', 'genre', 'GENDER']]


In [8]:
user_genre = input("Enter a genre: ").strip()
recommendations = recommend_songs_by_genre(user_genre, model, filtered, filtered_df, num_features)

if recommendations is not None:
    print("\nRecommended Songs in the genre:", user_genre)
    print(recommendations)
else:
    print("No recommendations found.")


Recommended Songs in the genre: rock
                        artist_name                         track_name  \
30160                    the police              every breath you take   
17354                        zz top                              leila   
3088                   donna summer                          walk away   
17456             steve miller band             keeps me wondering why   
29503                    the police                          so lonely   
3466            the pointer sisters                     should i do it   
28905  creedence clearwater revival                       feelin' blue   
17106                       santana                   stay (beside me)   
28923                       chicago                      make me smile   
2639                michael jackson                       take me back   
29984                          toto                     it's a feeling   
9731                    johnny cash                   it ain't me babe   


GENDER \
male             2991 \
female            774 \
mixed             174 \
non-binary         13 \
androgynous         7 \
genderfluid         7 \
not specified       5 \
Name: count, dtype: int64

In [None]:
male_recommendations = recommendations[(recommendations['GENDER'] == 'male') & (recommendations['genre'] == user_genre)].head(5)
female_recommendations = recommendations[(recommendations['GENDER'] == 'female') & (recommendations['genre'] == user_genre)].head(5)

print("First 5 Male Recommendations:")
print(male_recommendations)

print("\nFirst 5 Female Recommendations:")
print(female_recommendations)

First 5 Male Recommendations:
     artist_name                 track_name genre GENDER
8454        kyle    ispy (feat. lil yachty)   pop   male
8468  the weeknd                    secrets   pop   male
8818    labrinth              mount everest   pop   male
7602   will.i.am  hot wings (i wanna party)   pop   male
8705       bazzi                     myself   pop   male

First 5 Female Recommendations:
          artist_name             track_name genre  GENDER
7974            lorde          the love club   pop  female
6245             p!nk  get the party started   pop  female
7895  kacey musgraves          blowin' smoke   pop  female
6604   britney spears           do somethin'   pop  female
8650   camila cabello           real friends   pop  female
