In [None]:
import numpy as np

# Randomization
from random import shuffle, sample
from numpy.random import permutation

# Neural Network Stuff
from keras.models import Model
from keras.layers import Dense, Input, Concatenate
from keras.layers import Flatten, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping

# For Storing/Extracting NumPy Arrays in MongoDB
import numpy as np
from bson.binary import Binary
import pickle

# Database Stuff
from pymongo import MongoClient
import os
CONNECTION_STRING = os.getenv('DATABASE_URI')
DB = MongoClient(CONNECTION_STRING)

# Global Parameters
PLAYLIST_SIZE = 10
N_TRACK_FEATURES = 13


# Helper Functions =========================================================


def pickle_my_array(array):
    return Binary(pickle.dumps(array, protocol=2), subtype=128)


def unpickle_my_array(pickled_array):
    return pickle.loads(pickled_array)

# ==========================================================================


def get_training_data(database=DB):

    # Get Data from Database
    data = [(sample['playlist_matrix'], sample['track_vector'], sample['target'])
            for sample in database['track_playlist_pairings'].find({})]

    for i, datum in enumerate(data):
        if i == 0:
            X_playlists = unpickle_my_array(datum[0]).reshape(
                1, PLAYLIST_SIZE, N_TRACK_FEATURES)
            X_tracks = unpickle_my_array(datum[1]).reshape(1, N_TRACK_FEATURES)
            y = np.array([datum[2]])
        else:
            X_playlists = np.vstack((X_playlists, unpickle_my_array(datum[0]).reshape(
                1, PLAYLIST_SIZE, N_TRACK_FEATURES)))
            X_tracks = np.vstack((X_tracks, unpickle_my_array(
                datum[1]).reshape(1, N_TRACK_FEATURES)))
            y = np.vstack((y, np.array([datum[2]])))

    print('Training Data Loaded')
    print(f'Playlist Shape: {X_playlists.shape}')
    print(f'Track Shape: {X_tracks.shape}')
    print(f'Target Shape: {y.shape}')

    # Standardize Audio Feature Scaling so that every feature is between 0 and 1 (needs testing)
    X_playlists = (X_playlists - X_playlists.min(axis=0, keepdims=True)) / \
        X_playlists.ptp(axis=0, keepdims=True)
    X_tracks = (X_tracks - X_tracks.min(axis=0, keepdims=True)) / \
        X_tracks.ptp(axis=0, keepdims=True)

    return X_playlists, X_tracks, y

def create_model():
    # Model Parameters
    latent_dim = 64

    # INPUTS ===================================================================================

    # TRACK INPUTS
    # Each Track is a Vector of Features
    track_feature_inputs = Input(
        shape=(N_TRACK_FEATURES), name='Audio_Feature_Inputs')

    # PLAYLIST INPUTS
    # Each Playlist is a Matrix of (Track, Audio Features)
    playlist_matrix_inputs = Input(shape=(PLAYLIST_SIZE, N_TRACK_FEATURES),
                                    name='Playlist_Inputs')
    playlist_inputs = Flatten(name='Platlists_Flattened')(
        playlist_matrix_inputs)

    # PLAYLIST ENCODING =========================================================================

    playlists = Dense(256,
                        activation='relu',
                        name='Playlists_Dense_1')(playlist_inputs)

    playlists = Dense(256,
                        activation='relu',
                        name='Playlists_Dense_2')(playlists)


    playlists = Dense(256,
                        activation='relu',
                        name='Playlists_Dense_3')(playlists)


    playlists_latent = Dense(latent_dim,
                                activation='relu',
                                name='Playlists_Latent')(playlists)

    # TRACK ENCODING =========================================================================

    tracks = Dense(128,
                    activation='relu',
                    name='Tracks_Dense_1')(track_feature_inputs)

    tracks_latent = Dense(latent_dim,
                            activation='relu',
                            name='Tracks_Latent')(tracks)

    # CONCATENATE ENCODINGS ===================================================================

    concat = Concatenate(axis=-1,
                            name='Concatenated_Encodings')([playlists_latent, tracks_latent])

    # PREDICTION ==============================================================================

    # Prediction Layer 1
    x = Dense(256,
                name='Prediction_1',
                activation='relu')(concat)

    # Prediction Layer 2
    x = Dense(128,
                name='Prediction_2',
                activation='relu')(x)

    # Prediction Layer 3
    x = Dense(128,
                name='Prediction_3',
                activation='relu')(x)

    # Prediction Layer 4
    x = Dense(128,
                name='Prediction_4',
                activation='relu')(x)

    # Prediction Output
    result = Dense(1,
                    activation='sigmoid',
                    name='Final_Prediction')(x)

    # =========================================================================================

    # Build Model
    model = Model(inputs=[playlist_matrix_inputs, track_feature_inputs],
                        outputs=result,
                        name='Track_Recommender')

    # Compile Model
    model.compile(optimizer='nadam',
                loss='binary_crossentropy',
                metrics=['accuracy'])

    return model

def fit_model(model,
              X_playlists,
              X_tracks,
              y,
              patience=10,
              batch_size=32):

    stop = EarlyStopping(
        monitor='val_loss',
        min_delta=0.001,
        patience=patience,
        restore_best_weights=True
    )

    history = model.fit(
        [X_playlists, X_tracks],
        y,
        epochs=1000,
        batch_size=batch_size,
        shuffle=True,
        validation_split=0.1,
        verbose=2,
        callbacks=[stop],
        workers=1
    )

    os.mkdir('models')
    model.save('models/model1')

    return history