In [None]:
%matplotlib inline
import os
import pandas as pd
from glob import glob
import numpy as np

In [None]:
from keras import layers
from keras import models
from keras.layers.advanced_activations import LeakyReLU
from keras.optimizers import Adam
import keras.backend as K
import librosa
import librosa.display
import pylab
import matplotlib.pyplot as plt
from matplotlib import figure
import gc
import pathlib

In [None]:
cmap = plt.get_cmap('inferno')

plt.figure(figsize=(10,10))
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
genres = 'classical jazz metal pop rock'.split()
for g in genres:
    pathlib.Path(f'dataset/img_data/{g}').mkdir(parents=True, exist_ok=True)     
    for filename in os.listdir(f'./dataset/genres/{g}'):
        songname = f'./dataset/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=5)
        plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
        plt.axis('off');
        plt.savefig(f'dataset/img_data/{g}/{filename[:-3].replace(".", "")}.png')
        plt.clf()
 

# Create a spectrogram

In [None]:
def create_spectrogram(filename,name):
    plt.interactive(False)
    clip, sample_rate = librosa.load(filename, sr=None)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    filename  = 'working/train/' + name + '.jpg'
    plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del filename,name,clip,sample_rate,fig,ax,S

In [None]:
def create_spectrogram_test(filename,name):
    plt.interactive(False)
    clip, sample_rate = librosa.load(filename, sr=None)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    filename  = Path('working/test/' + name + '.jpg')
    fig.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del filename,name,clip,sample_rate,fig,ax,S

# Extracting features

In [None]:
import csv

In [None]:
ids = []
labels = []

In [None]:
file = open('./dataset/data.csv', 'w', newline='')
header = ["id","class"]
with file:
    writer = csv.writer(file)
    writer.writerow(header)
for g in genres:
    for filename in os.listdir(f'./dataset/genres/{g}'):
        songname = f'./dataset/genres/{g}/{filename}'
        create_spectrogram(songname,filename)
        
        # extracts the  genre name
        labels.append(filename.split('.')[0])
        ids.append(filename + '.jpg')
        


In [None]:
df = pd.DataFrame({'ID': ids, 'Class': labels})

In [None]:
df.head()

# Creating a data generator

In [None]:
from keras_preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(rescale=1/255, validation_split=.25)

In [None]:
train_generator = datagen.flow_from_dataframe(
    dataframe = df,
    directory="working/train/",
    x_col = "ID",
    y_col = "Class",
    subset="training",
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=(128, 128)
)

In [None]:
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.models import Sequential, Model
from keras.backend import placeholder
from keras.layers import Conv2D, MaxPooling2D, InputLayer
from keras import regularizers, optimizers
import pandas as pd
import numpy as np

def get_model():

    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same',
                     input_shape=(128,128,3), name='conv2d_1'))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3), name='conv2d_2'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(5, activation='softmax'))
    model.compile(optimizers.rmsprop(lr=0.0005, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])
    return model

model = get_model()
model.summary()

In [None]:
#Fitting keras model, no test gen for now
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
#STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    epochs=75
)


# Transfer Learning Boilerplate

In [None]:
from keras.preprocessing.image import load_img, img_to_array

In [None]:
SAMPLE_SONG_PATH = "working/train/classical.00000.au.jpg"
STYLE_SONG_PATH = "working/train/jazz.00000.au.jpg"

In [None]:
TARGET_HEIGHT = 128
TARGET_WIDTH = 128
TARGET_SIZE = (TARGET_HEIGHT, TARGET_WIDTH)

In [None]:
cImage = load_img(path=SAMPLE_SONG_PATH, target_size=TARGET_SIZE)
cImArr = img_to_array(cImage)
cImArr = np.expand_dims(cImArr, axis=0)
cImArr = K.variable(cImArr, dtype='float32')

In [None]:
cImArr.shape

In [None]:

sImage = load_img(path=SAMPLE_SONG_PATH, target_size=TARGET_SIZE)
sImArr = img_to_array(sImage)
sImArr = np.expand_dims(sImArr, axis=0)
sImArr = K.variable(sImArr, dtype='float32')

In [None]:
generatedImage = np.random.randint(
    256, size=(1, TARGET_WIDTH, TARGET_HEIGHT, 3)).astype('float64')
generatedImagePlaceholder = K.placeholder(shape=(1, TARGET_WIDTH, TARGET_HEIGHT, 3))

In [None]:
def getLayerIndexByName(model, layername):
    for idx, layer in enumerate(model.layers):
        if layer.name == layername:
            return idx
        
def get_feature_reps(x, layer_names, model):
    """
    Get feature representations of input x for one or more layers in a given model.
    """
    featMatrices = []
    inp = model.input
    
    outputs = [layer.output for layer in model.layers]          # all layer outputs
    functors = [K.function([inp, K.learning_phase()], [out]) for out in outputs]    # evaluation functions

    # Testing
    layer_outs = [func([x, 1.]) for func in functors]

    for ln in layer_names:
        i = getLayerIndexByName(model, ln)
        featMatrices.append(layer_outs[i])
        
        
    return featMatrices

def get_content_loss(F, P):
    cLoss = 0.5*K.sum(K.square(F - P))
    return cLoss

In [None]:
def get_Gram_matrix(F):
    G = K.dot(F, K.transpose(F))
    return G

def get_style_loss(ws, Gs, As):
    sLoss = K.variable(0.)
    for w, G, A in zip(ws, Gs, As):
        M_l = K.int_shape(G)[1]
        N_l = K.int_shape(G)[0]
        G_gram = get_Gram_matrix(G)
        A_gram = get_Gram_matrix(A)
        sLoss+= w*0.25*K.sum(K.square(G_gram - A_gram))/ (N_l**2 * M_l**2)
    return sLoss

In [None]:
def get_total_loss(gImPlaceholder, alpha=1.0, beta=10000.0):
    F = get_feature_reps(gImPlaceholder, layer_names=[cLayerName], model=gModel)[0]
    Gs = get_feature_reps(gImPlaceholder, layer_names=sLayerNames, model=gModel)
    contentLoss = get_content_loss(F, P)
    styleLoss = get_style_loss(ws, Gs, As)
    totalLoss = alpha*contentLoss + beta*styleLoss
    return totalLoss

In [None]:
def calculate_loss(gImArr):
    """
    Calculate total loss using K.function
    """
    if gImArr.shape != (1, TARGET_WIDTH, TARGET_WIDTH, 3):
        gImArr = gImArr.reshape((1, TARGET_WIDTH, TARGET_HEIGHT, 3))
    loss_fcn = K.function([gModel.input], [get_total_loss(gModel.input)])
    return loss_fcn([gImArr])[0].astype('float64')

def get_grad(gImArr):
    """
    Calculate the gradient of the loss function with respect to the generated image
    """
    if gImArr.shape != (1, TARGET_WIDTH, TARGET_WIDTH, 3):
        gImArr = gImArr.reshape((1, TARGET_WIDTH, TARGET_HEIGHT, 3))
    grad_fcn = K.function([gModel.input], 
                          K.gradients(get_total_loss(gModel.input), [gModel.input]))
    grad = grad_fcn([gImArr])[0].flatten().astype('float64')
    return grad

# Perform Style Transfer


In [None]:
model.input

In [None]:
tf_session = K.get_session()
cModel = get_model()
sModel = get_model()
gModel = get_model()

In [None]:
cLayerName = 'conv2d_2'
sLayerNames = [
    'conv2d_1',
    'conv2d_2',
]

In [None]:
sModel.summary()

In [None]:
sModel.summary()

In [None]:
P = get_feature_reps(x=cImArr, layer_names=[cLayerName], model=cModel)[0]
As = get_feature_reps(x=sImArr, layer_names=sLayerNames, model=sModel)
ws = np.ones(len(sLayerNames)) / float(len(sLayerNames))

In [None]:
iterations = 4
x_val = generatedImage.flatten()
xopt, f_val, info= fmin_l_bfgs_b(calculate_loss, x_val, fprime=get_grad,
                            maxiter=iterations, disp=True)

In [None]:
xopt.shape

In [None]:
xopt = xopt.reshape(128, 128, 3)

In [None]:
from PIL import Image

In [None]:
img = Image.fromarray(xopt, 'RGB')
img.show()

In [None]:
# Trial 2

In [None]:
from keras.applications import VGG16
from scipy.optimize import fmin_l_bfgs_b

In [None]:
tf_session = K.get_session()

In [None]:
cModel = VGG16(include_top=False, weights='imagenet', input_tensor=cImArr)
sModel = VGG16(include_top=False, weights='imagenet', input_tensor=sImArr)
gModel = VGG16(include_top=False, weights='imagenet', input_tensor=generatedImagePlaceholder)

In [None]:
cModel.summary()

In [None]:
cLayerName = 'block4_conv2'
sLayerNames = [
                'block1_conv1',
                'block2_conv1',
                'block3_conv1',
                'block4_conv1',
                ]

In [None]:
P = get_feature_reps(x=cImArr, layer_names=[cLayerName], model=cModel)[0]
As = get_feature_reps(x=sImArr, layer_names=sLayerNames, model=sModel)
ws = np.ones(len(sLayerNames))/float(len(sLayerNames))


In [None]:
iterations = 4
x_val = generatedImage.flatten()
xopt, f_val, info= fmin_l_bfgs_b(calculate_loss, x_val, fprime=get_grad,
                            maxiter=iterations, disp=True)

In [None]:
info

In [None]:
xopt.shape

In [None]:
xopt = xopt.reshape(128, 128, 3)

In [None]:
from PIL import Image

In [None]:
img = Image.fromarray(xopt, 'RGB')
img.show()