In [None]:
import matplotlib.pyplot as plt
# plt.style.use('seaborn-whitegrid')

# #set matplotlib defaults
# plt.rc('figure' , autolayout = True)
# plt.rc('axes' , labelweight = 'bold' , labelsize = 'large' , titleweight = 'bold' , titlesize = 18 , titlepad = 10)
# plt.rc('animation' , html = 'html5')

import pandas as pd
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.model_selection import GroupShuffleSplit

spotify = pd.read_csv('./archive/spotify.csv')




In [None]:
spotify.head()

In [None]:
nan_counts = spotify.isna().sum()
# print(nan_counts)
X = spotify.copy().dropna()  #drop rows with missing target 
y = X.pop('track_popularity')
artists = X['track_artist']

features_num = ['danceability', 'energy', 'key', 'loudness', 'mode',
                'speechiness', 'acousticness', 'instrumentalness',
                'liveness', 'valence', 'tempo', 'duration_ms']

features_cat = ['playlist_genre']

# #puting features num in data frame to visualize
# X_num = X[features_num].copy()
# X_num.head()

#printing the total number of unique artists
print(f'There are {len(artists.unique())} artists in the dataset.')

preprocessor = make_column_transformer(
    (StandardScaler(), features_num),
    (OneHotEncoder(), features_cat),
)


In [None]:
# We'll do a "grouped" split to keep all of an artist's songs in one
# split or the other. This is to help prevent signal leakage.
def group_split (X,y,group,train_size = 0.75):
    splitter = GroupShuffleSplit(train_size = train_size)
    train,test = next(splitter.split(X,y,groups = group))
    return (X.iloc[train],X.iloc[test],y.iloc[train],y.iloc[test])

X_train , X_valid , y_train , y_valid = group_split(X,y,artists)

X_train = preprocessor.fit_transform(X_train)
X_valid = preprocessor.transform(X_valid)
y_train = y_train / 100
y_valid = y_valid / 100

# df = pd.DataFrame(X_train)
# display(df.head())
# df = pd.DataFrame(y_train)
# display(df.head())

input_shape = [X_train.shape[1]]
print('Input shape: {}'.format(input_shape))

In [None]:
from tensorflow import keras
from tensorflow.keras import layers


model = keras.Sequential([
    layers.Dense(1,input_shape = input_shape),
])

model.compile(
    optimizer = 'adam',
    loss = 'mae',)

history = model.fit(
    X_train,y_train,
    validation_data = (X_valid,y_valid),
    batch_size = 512,
    epochs = 50,
    verbose = 0,
)

history_df = pd.DataFrame(history.history)
history_df.loc[0:,['loss','val_loss']].plot()
print("Minimum validation loss: {}".format(history_df['val_loss'].min()))

In [None]:
model = keras.Sequential([
    layers.Dense(128,activation = 'relu',input_shape = input_shape),
    layers.Dense(64,activation = 'relu'),
    layers.Dense(1)])
model.compile(
    optimizer = 'adam',
    loss = 'mae',)
history = model.fit(
    X_train,y_train,
    validation_data = (X_valid,y_valid),
    batch_size = 512,
    epochs = 50,
)
history_df = pd.DataFrame(history.history)
history_df.loc[0:,['loss','val_loss']].plot()
print("Minimum validation loss: {}".format(history_df['val_loss'].min()))

In [None]:
#early stopping
from tensorflow.keras import callbacks
early_stopping = callbacks.EarlyStopping(
    min_delta = 0.001,
    patience = 5,
    restore_best_weights = True,
)
    


In [None]:
model = keras.Sequential([
    layers.Dense(128,activation = 'relu',input_shape = input_shape),
    layers.Dense(64,activation = 'relu'),
    layers.Dense(1)])
model.compile(
    loss="mae",
    optimizer="adam"
)
history = model.fit(
    X_train,y_train,
    validation_data=(X_valid,y_valid),
    callbacks=early_stopping,
    epochs=50,
    batch_size=512
)

history_df = pd.DataFrame(history.history)
history_df.loc[:,["loss","val_loss"]].plot()
print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()))



In [None]:
model = keras.Sequential([
    layers.Dense(128,activation = 'relu',input_shape = input_shape),
    layers.Dropout(rate=0.5) , # apply 30% dropuot to next layer
    layers.Dense(64,activation = 'relu'),
    layers.Dense(1)])
model.compile(
    loss="mae",
    optimizer="adam"
)
history = model.fit(
    X_train,y_train,
    validation_data=(X_valid,y_valid),
    # callbacks=early_stopping,
    epochs=50,
    batch_size=512
)

history_df = pd.DataFrame(history.history)
history_df.loc[:,["loss","val_loss"]].plot()
print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()))

In [None]:
model = keras.Sequential([
    layers.Dense(128,activation = 'relu',input_shape = input_shape),
    layers.Dropout(rate=0.5) , # apply 30% dropuot to next layer
    layers.Dense(64,activation = 'relu'),
    layers.Dropout(rate=0.5) , # apply 30% dropuot to next layer
    layers.Dense(1)])
model.compile(
    loss="mae",
    optimizer="adam"
)
history = model.fit(
    X_train,y_train,
    validation_data=(X_valid,y_valid),
    # callbacks=early_stopping,
    epochs=50,
    batch_size=512
)

history_df = pd.DataFrame(history.history)
history_df.loc[:,["loss","val_loss"]].plot()
print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()))

In [None]:
model = keras.Sequential([
    layers.Dropout(rate=0.5) , # apply 30% dropuot to next layer
    layers.Dense(128,activation = 'relu',input_shape = input_shape),
    layers.Dropout(rate=0.5) , # apply 30% dropuot to next layer
    layers.Dense(64,activation = 'relu'),
    layers.BatchNormalization(),
    layers.Dense(1)])
model.compile(
    loss="mae",
    optimizer="sgd"
)
history = model.fit(
    X_train,y_train,
    validation_data=(X_valid,y_valid),
    # callbacks=early_stopping,
    epochs=50,
    batch_size=512,
    verbose=0
)

history_df = pd.DataFrame(history.history)
history_df.loc[:,["loss","val_loss"]].plot()
print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()))