In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score

In [23]:
tracks = pd.read_csv("cleaned_data.csv")
playlist = pd.read_csv("playlist.csv")

## Model training using all genres

In [13]:
track_data = tracks[~tracks['track_id'].isin(playlist['track_id'])].copy()

encoder = LabelEncoder()
track_data['track_genre'] = encoder.fit_transform(track_data['track_genre'])

features = track_data[['popularity', 'duration_ms', 'danceability', 'energy', 'key', 'loudness', 'mode', 
                       'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 
                       'time_signature']]
# features = track_data[['energy', 'key', 'mode', 
#                        'acousticness', 'valence', 'tempo']]
target = track_data['track_genre']

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=3)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)

y_pred = knn.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
# print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.2478921779801834


## Model training using the top 10 most frequent genres

In [27]:
selected_genres = ['study', 'black-metal', 'comedy', 'heavy-metal', 'bluegrass', 'forro', 'grindcore', 'malay', 'idm', 'iranian']
# selected_genres = ['study', 'black-metal', 'comedy']
filtered_tracks = tracks[tracks['track_genre'].isin(selected_genres)].copy()
filtered_track_data = filtered_tracks[~filtered_tracks['track_id'].isin(playlist['track_id'])].copy()

encoder_filter = LabelEncoder()
filtered_track_data['track_genre'] = encoder_filter.fit_transform(filtered_track_data['track_genre'])

features = filtered_track_data[['popularity', 'duration_ms', 'danceability', 'energy', 'key', 'loudness', 'mode', 
                       'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 
                       'time_signature']]
# features = filtered_track_data[['energy', 'key', 'mode', 
#                        'acousticness', 'valence', 'tempo']]
target = filtered_track_data['track_genre']

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=3)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn_filtered = KNeighborsClassifier(n_neighbors=5)
knn_filtered.fit(X_train_scaled, y_train)

y_pred = knn_filtered.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
# print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.7488348006214397


In [25]:
playlist_features = playlist[['popularity', 'duration_ms', 'danceability', 'energy', 'key', 'loudness', 'mode', 
                                     'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 
                                     'time_signature']]
playlist_features_scaled = scaler.transform(playlist_features)

# Predict genre of playlist
playlist_genre_predictions = knn.predict(playlist_features_scaled)
predicted_genres = encoder.inverse_transform(playlist_genre_predictions)
playlist['predicted_genre'] = predicted_genres
print(playlist[['track_name', 'predicted_genre']])

# Predict genre of playlist with selected genres
playlist_genre_predictions_filtered = knn_filtered.predict(playlist_features_scaled)
predicted_genres_filtered = encoder_filter.inverse_transform(playlist_genre_predictions_filtered)
playlist['predicted_genre'] = predicted_genres_filtered
print(playlist[['track_name', 'predicted_genre']])


                                         track_name predicted_genre
0                                     Getting Older        alt-rock
1                                     Afternoon Tea          brazil
2                                         Tennessee           blues
3   Apokaliptyczny Młot (Obliteracja Poprzez Życie)        acoustic
4                                Au Bord Du Gouffre        acoustic
..                                              ...             ...
95                                colors of a night          brazil
96                                          roasted        afrobeat
97                                    frozen grapes          brazil
98                              I Wanna Be A Writer          brazil
99                   Open the Gates O Forest Keeper     black-metal

[100 rows x 2 columns]
                                         track_name predicted_genre
0                                     Getting Older          comedy
1                       