In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import zipfile
import librosa
import soundfile as sf
import io
import IPython
from scipy import signal
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from util import *
from functools import partial


import tensorflow as tf
from tensorflow import keras

In [None]:
class ImageSplitter:
    
    def __init__(self,
                ax = 1,
                split_along = 1,
                split_size_0 = 2,
                split_size_1 = 2):
        self.ax = ax
        self.split_along = split_along
        self.split_size_0 = split_size_0
        self.split_size_1 = split_size_1
    
    def get_image_params(self, image):
        _0, _1 = image.shape
        if (_0 % self.split_size_0 != 0) or (_1 % self.split_size_1 != 0):
            raise Exception("Can't split image to desired size")
        return _0, _1, int(_0 / self.split_size_0), int(_1 / self.split_size_1)
    
    def split_along_one(self, image):
        _0, _1, s_0, s_1 = self.get_image_params(image)
        if self.ax == 0:
            s = s_0
        else:
            s = s_1
        return np.array(np.split(image, s, axis=self.ax))
    
    def split_along_two(self, image):
        _0, _1, s_0, s_1 = self.get_image_params(image)
        q = []
        for i in np.split(image, s_0, axis=0):
            q.append(np.split(i, s_1, axis=1))
        q = np.array(q)
        return q.reshape(-1, self.split_size_0, self.split_size_1)

In [2]:
# Build library of all songs (10 genres, 100 songs each)
catalogue = build_catalogue()
library = build_library(catalogue, c_type='list')

Sample rate is:22050


In [30]:
categories = {k:v for k, v in zip(range(10), catalogue.keys())}

In [3]:
# Trim all tracks to n=660000 samples and convert to numpy array. Will have dimensions (10, 100, 660000).
a = []
for n, i in enumerate(library):
    s = []
    for j in i:
        s.append(j[:660000])
    library[n] = s
library = np.array(library)

In [4]:
# Some utility functions to select and downsaple tunes:
def select_tune(genre, tune_n, library, genre_cat = genres):
    genres = {k:v for k, v in zip(genre_cat, range(10))}
    return library[genres[genre]][tune_n]

def down_sample_tune(tune, dsr=5):
    ds_sample = tune.reshape(-1, dsr).mean(axis=1).flatten()
    return ds_sample

In [5]:
# Now use all available tracks for all genres:
songs = []
for i in library:
    for j in i:
        ds_tune = down_sample_tune(j, dsr=5)
        f, t, Sxx = signal.spectrogram(ds_tune, fs=22050 / 5, nperseg=1024, window='hanning')
        Sxx[Sxx == 0] = 1
        s = (10 * np.log10(Sxx))
        songs.append(s)
songs = np.array(songs)

In [133]:
im_s = ImageSplitter(split_size_0=260, split_size_1=73)
songs_mod = []
for im in songs[:, :260, :146]:
    songs_mod.append(im_s.split_along_two(im))
songs_mod = np.array(songs_mod).reshape(-1, 260, 73)[..., np.newaxis]

In [134]:
labels = np.repeat(np.arange(10), 200)

In [135]:
X_train_FULL, X_test_FULL, y_train_FULL, y_test_FULL = train_test_split(songs_mod, labels, test_size=0.3, random_state=42)
X_train, X_valid = X_train_FULL[:1200], X_train_FULL[1200:]
y_train, y_valid = y_train_FULL[:1200], y_train_FULL[1200:]

x_mean = X_train.mean()
x_std = X_train.std()
X_train = (X_train - x_mean) / x_std
X_valid = (X_valid - x_mean) / x_std

In [140]:
DefaultConv2D = partial(keras.layers.Conv2D,
                        kernel_size=3, activation='relu', padding="SAME")

model = keras.models.Sequential([
    DefaultConv2D(filters=16, kernel_size=3, input_shape=[260, 73, 1]),
    keras.layers.MaxPooling2D(pool_size=2),
    DefaultConv2D(filters=32),
    DefaultConv2D(filters=32),
    keras.layers.MaxPooling2D(pool_size=2),
    DefaultConv2D(filters=64),
    DefaultConv2D(filters=64),
    keras.layers.MaxPooling2D(pool_size=4),
    keras.layers.Flatten(),
    keras.layers.Dense(units=64, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(units=32, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(units=10, activation='softmax'),
])

In [141]:
model.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_60 (Conv2D)           (None, 260, 73, 16)       160       
_________________________________________________________________
max_pooling2d_36 (MaxPooling (None, 130, 36, 16)       0         
_________________________________________________________________
conv2d_61 (Conv2D)           (None, 130, 36, 32)       4640      
_________________________________________________________________
conv2d_62 (Conv2D)           (None, 130, 36, 32)       9248      
_________________________________________________________________
max_pooling2d_37 (MaxPooling (None, 65, 18, 32)        0         
_________________________________________________________________
conv2d_63 (Conv2D)           (None, 65, 18, 64)        18496     
_________________________________________________________________
conv2d_64 (Conv2D)           (None, 65, 18, 64)      

In [142]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="sgd", metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [143]:
y_pred = model.predict(X_train)

In [149]:
y_pred[10]

array([0.1320011 , 0.13955067, 0.10709064, 0.08561222, 0.06417766,
       0.16635914, 0.06488865, 0.06474456, 0.1046022 , 0.07097319],
      dtype=float32)

In [148]:
y_train[10]

5

In [2]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train, X_valid = X_train_full[:-5000], X_train_full[-5000:]
y_train, y_valid = y_train_full[:-5000], y_train_full[-5000:]

X_mean = X_train.mean(axis=0, keepdims=True)
X_std = X_train.std(axis=0, keepdims=True) + 1e-7
X_train = (X_train - X_mean) / X_std
X_valid = (X_valid - X_mean) / X_std
X_test = (X_test - X_mean) / X_std

X_train = X_train[..., np.newaxis]
X_valid = X_valid[..., np.newaxis]
X_test = X_test[..., np.newaxis]

In [10]:
from functools import partial

DefaultConv2D = partial(keras.layers.Conv2D,
                        kernel_size=3, activation='relu', padding="SAME")

model = keras.models.Sequential([
    DefaultConv2D(filters=32, kernel_size=7, input_shape=[28, 28, 1]),
    keras.layers.MaxPooling2D(pool_size=2),
    DefaultConv2D(filters=64),
    keras.layers.MaxPooling2D(pool_size=2),
    DefaultConv2D(filters=128),
    keras.layers.MaxPooling2D(pool_size=2),
    keras.layers.Flatten(),
    keras.layers.Dense(units=64, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(units=32, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(units=10, activation='softmax'),
])

In [11]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 28, 28, 32)        1600      
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 7, 7, 128)         73856     
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 3, 3, 128)         0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 1152)             

In [12]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
