# AI for estimating song's popularity

In [5]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [8]:
data = pd.read_csv("dataset.csv")

# Utwórz nową kolumnę "genre_id" i przypisz wartości numeryczne do każdego unikalnego gatunku
data['artists_id'], _ = pd.factorize(data['artists'])

# Zapisz zmodyfikowane dane do pliku CSV
data.to_csv('dataset.csv', index=False)

data = data.sample(frac=0.2)

In [7]:
columns = ['artists','track_name','popularity','duration_ms','explicit','danceability','energy','key','loudness','mode','speechiness','acousticness','instrumentalness','liveness','valence','tempo','time_signature','genre_id']

# Kodowanie one-hot dla kolumny 'track_genre'
one_hot = pd.get_dummies(data['track_genre'], prefix='genre')
data = pd.concat([data, one_hot], axis=1)

amountOfFeatures = len(columns) + len(one_hot.columns)

# Podział danych na zbiór treningowy i testowy
train_data, test_data, train_labels, test_labels = train_test_split(
    data[columns + list(one_hot.columns)], data['popularity'], test_size=0.2)

train_data = tf.convert_to_tensor(train_data, dtype=tf.float32)
test_data = tf.convert_to_tensor(test_data, dtype=tf.float32)
train_labels = tf.convert_to_tensor(train_labels, dtype=tf.float32)
test_labels = tf.convert_to_tensor(test_labels, dtype=tf.float32)

ValueError: could not convert string to float: 'Nacho Vegas'

In [None]:
# Normalization

mean = train_data.numpy().mean(axis=0)
std = train_data.numpy().std(axis=0)
train_data = (train_data - mean) / std
test_data = (test_data - mean) / std

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(amountOfFeatures + 1, activation='relu', input_shape=[len(columns)]),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures, activation='relu'),
    tf.keras.layers.Dense(amountOfFeatures / 2, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(loss='mean_squared_error',
              optimizer=tf.keras.optimizers.RMSprop(0.001),
              metrics=['mae', 'mse'])

In [None]:
# Trenowanie modelu
history = model.fit(train_data, train_labels, epochs=30, validation_split=0.2, verbose=0)

In [None]:
test_results = model.evaluate(test_data, test_labels, verbose=0)
test_mae = test_results[1]

In [None]:
# Wykres
plt.plot(history.history['mse'])
plt.plot(history.history['val_mse'])
plt.title('Mean Squared Error')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.ylim(top=1000)
plt.ylim(bottom=0)
print("Mean Absolute Error:", test_mae)
plt.show()