In [1]:
import pandas as pd
import numpy as np
import os
from PIL import Image
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.preprocessing.image import img_to_array
import cv2

In [3]:
import dataclasses
import zipfile as zf

dataset = !wget https://storage.googleapis.com/new_music_bucket/spectrograms.zip

dataset = zf.ZipFile(f'spectrograms.zip', 'r')
dataset.extractall()
dataset.close()

In [4]:
url = "https://drive.google.com/file/d/1HxubcoB0vIZ-ldFIBn87ll5H34WJ4xhc/view?usp=share_link"
path = "https://drive.google.com/uc?export=download&id="+url.split("/")[-2]
merged_df = pd.read_csv(path)

In [9]:
merged_df.drop(['Path', 'Danceability', 'Energy', 'Key',
       'Loudness', 'Mode', 'Speechiness', 'Acoutsicness', 'Instrumentalness',
       'Liveness', 'Valence', 'Tempo'], inplace=True, axis=1)

In [10]:
shuffled_df = merged_df.sample(frac=1)
train_df, val_df, test_df = shuffled_df[:7000], shuffled_df[7000:8550], shuffled_df[8550:]

In [11]:
def get_X_y(df):

  X_pic, X_stats = [], []
  y = []

  for name in df['NPZ_Path']:
    loaded_npz = np.load(name, allow_pickle=True)

    pic = loaded_npz['pic']
    X_pic.append(pic)

    stats = loaded_npz['stats']
    X_stats.append(stats)
    
    
    y.append(loaded_npz['track_popularity'])

  X_pic, X_stats = np.array(X_pic), np.array(X_stats)
  y = np.array(y)

  return (X_pic, X_stats), y



In [12]:
# Get the training data
(X_train_pic, X_train_stats), y_train = get_X_y(train_df)

In [15]:
X_train_pic.shape

(7000, 240, 320, 3)

In [16]:
# Get the validation data

(X_val_pic, X_val_stats), y_val = get_X_y(val_df)

(X_val_pic.shape, X_val_stats.shape), y_val.shape

(((1550, 240, 320, 3), (1550, 11)), (1550,))

In [17]:
# %% Get the test data
(X_test_pic, X_test_stats), y_test = get_X_y(test_df)

(X_test_pic.shape, X_test_stats.shape), y_test.shape

(((1605, 240, 320, 3), (1605, 11)), (1605,))

In [18]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_stats_scaled = scaler.fit_transform(X_train_stats)
X_test_stats_scaled = scaler.fit_transform(X_test_stats)
X_val_stats_scaled = scaler.fit_transform(X_val_stats)

In [None]:
# Define the Model

from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

# Define the Picture (CNN) Stream

input_pic = layers.Input(shape=(240, 320, 3))

x         = MobileNetV2(input_shape=((240, 320, 3)), include_top=False)(input_pic)
x         = layers.Conv2D(128, (3, 3), activation='relu', padding='same', input_shape=(240, 320, 3))(x)
x         = layers.Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(240, 320, 3))(x)
x         = layers.GlobalAveragePooling2D()(x)
x         = layers.Dense(10, activation = 'sigmoid')(x)
x         = layers.Dense(10, activation = 'sigmoid')(x)
x         = Model(inputs=input_pic, outputs=x)

# Define the Stats (Feed-Forward) Stream

input_stats = layers.Input(shape=(11,))
y = layers.Dense(64, kernel_regularizer='L1L2')(input_stats)
y = layers.Flatten()(y) 
y = layers.Dense(32, activation="relu", kernel_regularizer='L1L2')(y)
y = layers.Dense(10, activation="relu")(y)
y = Model(inputs=input_stats, outputs=y)


# Concatenate the two streams together
combined = layers.concatenate([x.output, y.output])

# Define joined Feed-Forward Layer
z = layers.Dense(4, activation="relu")(combined)

# Define output node of 1 linear neuron (regression task)
z = layers.Dense(1, activation="linear")(z)


# Define the final model
model = Model(inputs=[x.input, y.input], outputs=z)



In [None]:
from tensorflow.keras.optimizers import Adam

optimizer = Adam(learning_rate=0.01)

model.compile(loss='mse', optimizer=optimizer, metrics=['mean_absolute_error'])

In [None]:
evaluation_list = []

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

cp = ModelCheckpoint('model/', save_best_only=True)

model.fit(x=[X_train_pic, X_train_stats_scaled], y=y_train, validation_data=([X_val_pic, X_val_stats_scaled], y_val), epochs=5, callbacks=[cp])

Epoch 1/5



Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7feec09e0610>

In [None]:
evaluation_list.append(model.evaluate((X_test_pic, X_test_stats_scaled), y_test))



In [None]:
evaluation_list

[[267.32672119140625, 13.234389305114746],
 [267.3475646972656, 13.220735549926758]]