In [25]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

# Reading CSV file and creating DataFrame
df = pd.read_csv("data/Big_Data_2022_05_30.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Rank,Date,Title,Danceability,Energy,Loudness,Speechiness,Acousticness,Instrumentalness,Valence,id,Date_value
0,0,1,29/05/2023,Ella Baila Sola,0.668,0.758,-5176.0,0.033,0.483,0.0,0.834,3qQbCzHBycnDpGskqOWY0E,0.485714
1,1,1,29/05/2023,Ella Baila Sola,0.668,0.758,-5176.0,0.033,0.483,0.0,0.834,3qQbCzHBycnDpGskqOWY0E,0.485714
2,2,2,29/05/2023,WHERE SHE GOES,0.652,0.8,-4019.0,0.061,0.143,0.629,0.234,7ro0hRteUMfnOioTFI5TG1,0.485714
3,3,3,29/05/2023,La Bebe - Remix,0.812,0.479,-5678.0,0.333,0.213,0.0,0.559,2UW7JaomAMuX9pZrjVpHAU,0.485714
4,4,3,29/05/2023,La Bebe - Remix,0.812,0.479,-5678.0,0.333,0.213,0.0,0.559,2UW7JaomAMuX9pZrjVpHAU,0.485714


In [26]:
# Dropping collumns not needed to train our model
df = df.drop(columns=["Date","Title","id"])
df.head()


Unnamed: 0.1,Unnamed: 0,Rank,Danceability,Energy,Loudness,Speechiness,Acousticness,Instrumentalness,Valence,Date_value
0,0,1,0.668,0.758,-5176.0,0.033,0.483,0.0,0.834,0.485714
1,1,1,0.668,0.758,-5176.0,0.033,0.483,0.0,0.834,0.485714
2,2,2,0.652,0.8,-4019.0,0.061,0.143,0.629,0.234,0.485714
3,3,3,0.812,0.479,-5678.0,0.333,0.213,0.0,0.559,0.485714
4,4,3,0.812,0.479,-5678.0,0.333,0.213,0.0,0.559,0.485714


In [27]:
#Binning our data

# Defining bin edges and labels
bins = [0, 50, 100, 150, 200]
labels = [1, 2, 3, 4]

#Creating column with binned data
df['Popularity'] = pd.cut(df['Rank'], bins=bins, labels=labels, right=True)



In [28]:
#Dropping 'Rank' and unnamed column
df = df.drop(columns=["Rank", "Unnamed: 0"])

df.head()

Unnamed: 0,Danceability,Energy,Loudness,Speechiness,Acousticness,Instrumentalness,Valence,Date_value,Popularity
0,0.668,0.758,-5176.0,0.033,0.483,0.0,0.834,0.485714,1
1,0.668,0.758,-5176.0,0.033,0.483,0.0,0.834,0.485714,1
2,0.652,0.8,-4019.0,0.061,0.143,0.629,0.234,0.485714,1
3,0.812,0.479,-5678.0,0.333,0.213,0.0,0.559,0.485714,1
4,0.812,0.479,-5678.0,0.333,0.213,0.0,0.559,0.485714,1


In [29]:
df.dtypes

Danceability         float64
Energy               float64
Loudness             float64
Speechiness          float64
Acousticness         float64
Instrumentalness     float64
Valence              float64
Date_value           float64
Popularity          category
dtype: object

In [30]:
#Removing 'Popularity' from features data
y = df['Popularity']
X = df.drop(columns="Popularity")

In [31]:
# get_dummies
y = pd.get_dummies(y)

y.head()

Unnamed: 0,1,2,3,4
0,True,False,False,False
1,True,False,False,False
2,True,False,False,False
3,True,False,False,False
4,True,False,False,False


In [32]:
#Splitting into training/testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [33]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

**Model Optimization**

In [39]:
# Optimizing the model by increasing the number of neurons in each hidden layer, adding an additional hidden layer, and adjusting the activation functions
num_input_features = len(X_train_scaled[0])
hidden_layer_nodes_1 = 300
hidden_layer_nodes_2 = 300
hidden_layer_nodes_3 = 250
hidden_layer_nodes_4 = 250

nn = tf.keras.models.Sequential()

# Creating first hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_layer_nodes_1, input_dim=num_input_features, activation="relu"))

# Creating second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_layer_nodes_2, activation="relu"))

# Creating third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_layer_nodes_3, activation="relu"))

# Creating fourth hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_layer_nodes_4, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=4, activation="sigmoid"))

# Check the structure of the model
nn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [40]:
# Compiling and training the optimized model, changing loss function as well
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
fit_model = nn.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
[1m2430/2430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 4ms/step - accuracy: 0.4157 - loss: 0.5079
Epoch 2/100
[1m2430/2430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.6194 - loss: 0.3738
Epoch 3/100
[1m2430/2430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.6654 - loss: 0.3337
Epoch 4/100
[1m2430/2430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.6838 - loss: 0.3173
Epoch 5/100
[1m2430/2430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.6985 - loss: 0.3053
Epoch 6/100
[1m2430/2430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.7041 - loss: 0.2981
Epoch 7/100
[1m2430/2430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.7121 - loss: 0.2926
Epoch 8/100
[1m2430/2430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.7154 - loss: 0.2866
Epoch 9/100


In [41]:
# Evaluate the performance of the optimized model using the loss and predictive accuracy of the model on the train dataset.
model_loss, model_accuracy = nn.evaluate(X_train_scaled,y_train,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2430/2430 - 9s - 4ms/step - accuracy: 0.7791 - loss: 0.2218
Loss: 0.22179144620895386, Accuracy: 0.779112696647644


In [42]:
# Evaluate the performance of the optimized model using the loss and predictive accuracy of the model on the test dataset.
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

810/810 - 3s - 3ms/step - accuracy: 0.7395 - loss: 0.2768
Loss: 0.27678173780441284, Accuracy: 0.739484429359436


In [44]:
nn.save('Spotify_ML_Optimization.h5')

