In [1]:
# Perceptron 

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import f1_score
from sklearn.linear_model import Perceptron

In [2]:
df = pd.read_csv('Spotify-Dupes.csv',index_col=0)
df.drop(['track_id', 'album_name'], axis = 1, inplace = True)

In [3]:
# Use the `str.split` method to split the `artists` column into a list of artists
df["artists"] = df["artists"].str.split(';')

In [4]:
# Convert the `explicit` column to a boolean dtype
df['explicit'] = df['explicit'].astype(bool)

In [5]:
# Use the `groupby` method to group the data by `track_name`
df = df.groupby(["track_name"]).agg({"artists":lambda x: x.iloc[0],
                                      "popularity":lambda x: x.iloc[0],
                                      "duration_ms":lambda x: x.iloc[0],
                                      "explicit":lambda x: x.iloc[0],
                                      "danceability":lambda x: x.iloc[0],
                                      "energy":lambda x: x.iloc[0],
                                      "key":lambda x: x.iloc[0],
                                      "loudness":lambda x: x.iloc[0],
                                      "mode":lambda x: x.iloc[0],
                                      "speechiness":lambda x: x.iloc[0],
                                      "acousticness":lambda x: x.iloc[0],
                                      "instrumentalness":lambda x: x.iloc[0],
                                      "liveness":lambda x: x.iloc[0],
                                      "track_genre": list})


In [6]:
#df= df[~df['track_genre'].isin(["j-pop"])]
df = df[~df['track_genre'].apply(lambda x: 'j-pop' in x or 'anime' in x or 'black-metal' in x or 'bluegrass' in x or 'brazil' in x or 'cantopop' in x
                                  or 'french' in x or 'german' in x or 'indian' in x or 'iranian' in x or 'j-dance' in x or 'j-idol' in x
                                  or 'j-rock' in x or'k-pop' in x or 'malay' in x or 'mandopop' in x or 'swedish' in x or 'turkish' in x or 'world-music' in x)]

In [7]:
df.to_csv('GenreRemoved.csv')
df = df.sample(8000)
# Use the `apply` method to apply a function to each element of the `track_genre` column
# The function maps the values of the `track_genre` column to integers using a dictionary
all_genres = df["track_genre"].apply(pd.Series).stack().unique()
track_genre_dict = {genre: i for i, genre in enumerate(all_genres)}
df['track_genre'] = df['track_genre'].apply(lambda x: [track_genre_dict[a] for a in x])
df['track_genre'] = df['track_genre'].apply(set)
df['track_genre'] = df['track_genre'].apply(list)

In [8]:
all_artists = df["artists"].apply(pd.Series).stack().unique()
artist_dict = {artist: i for i, artist in enumerate(all_artists)}
df['artists'] = df['artists'].apply(lambda x: [artist_dict[a] for a in x])
df['artists'] = df['artists'].apply(set)
df['artists'] = df['artists'].apply(list)

In [9]:
df = df.dropna()

In [10]:
mlb = MultiLabelBinarizer()
artists_bin = mlb.fit_transform(df['artists'])
track_genre_bin = mlb.fit_transform(df['track_genre'])

result = []
for x in df.columns:
    if (x != 'track_genre' and x!='artists'):
        result.append(x)

In [11]:
X = df[result].values
y = track_genre_bin

In [12]:
X = np.concatenate((X, artists_bin), axis=1)

In [13]:
clf = MultiOutputClassifier(Perceptron(random_state=1))

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

In [15]:
# Fit the model to the training data
clf.fit(X_train, y_train)

MultiOutputClassifier(estimator=Perceptron(random_state=1))

In [16]:
# Make predictions on the testing data
y_pred = clf.predict(X_test)

In [17]:
# Calculate accuracy metric
accuracy = accuracy_score(y_pred, y_test)
mprecision = precision_score(y_test, y_pred, average='micro')
macprecision = precision_score(y_test, y_pred, average='macro')
wprecision = precision_score(y_test, y_pred, average='weighted')
sprecision = precision_score(y_test, y_pred, average='samples')
f=f1_score(y_test, y_pred, average='micro')

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [18]:
print('The accuracy is: ',accuracy*100,'%')
print('The Micro Precision is: ',mprecision*100,'%')
print('The Macro Precision is: ',macprecision*100,'%')
print('The Weight Precision is: ',wprecision*100,'%')
print('The Sample Precision is: ',sprecision*100,'%')
print("f1 =  ", f*100 , "%")

The accuracy is:  2.0 %
The Micro Precision is:  2.0 %
The Macro Precision is:  0.021052631578947368 %
The Weight Precision is:  0.03193612774451098 %
The Sample Precision is:  2.0 %
f1 =   1.7758046614872365 %
