In [1]:
import numpy as np
import scipy as sp
import scipy.io as sio
from scipy import signal
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pickle

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from umap import UMAP

In [2]:
files = !ls /media/big/spectra
files = [f'/media/big/spectra/{f}' for f in files]
len(files)

916

In [3]:
freqs = np.fft.fftfreq(2 ** 16, 1 / 16000)
freqs_idxs = (freqs >= 10) & (freqs <= 100)
freqs = freqs[freqs_idxs]
freqs_idxs = (freqs >= 10) & (freqs <= 100)

try:
    X = np.load('X.npy')
    Y = np.load('Y.npy')
    mnX = np.load('mnX.npy')
    stX = np.load('stX.npy')

except:
    X = []
    Y = []
    for idx, f in enumerate(files):
        arr = np.load(f)
        X.append(arr[::20, freqs_idxs])
        for _ in range(len(arr[::20])):
            Y.append(idx)

    X = np.concatenate(X)
    Y = np.array(Y)

    mnX = X.mean(0).reshape((1, -1))
    stX = X.std (0).reshape((1, -1))

    X -= mnX
    X /= stX

    np.save('mnX', mnX)
    np.save('stX', stX)
    np.save('X', X)
    np.save('Y', Y)

print(X.shape, Y.shape)

(5078978, 369) (5078978,)


In [4]:
try:
    with open('pca.pickle', 'rb') as fp:
        obj = pickle.load(fp)
        pca = obj['pca']

except:
    pca = PCA(30).fit(X[::10])
    with open('pca.pickle', 'wb') as fp:
        pickle.dump({
            'pca': pca
        }, fp)
pca.explained_variance_ratio_.cumsum()

array([0.20581471, 0.2773456 , 0.33571487, 0.39136264, 0.44166363,
       0.48797191, 0.53130338, 0.57187992, 0.61165775, 0.64868533,
       0.68438381, 0.71804078, 0.75000287, 0.78040423, 0.80895596,
       0.83545723, 0.86062185, 0.88417961, 0.90585487, 0.92592705,
       0.94384902, 0.96002274, 0.97451584, 0.98328626, 0.98661327,
       0.98799482, 0.9891463 , 0.99016634, 0.99108563, 0.99192572])

In [5]:
X = []
Y = []

fs = 1000
subsample = 10
fs = int(fs / subsample)

t_in  = 1 * fs
t_out = int(0.1 * fs)

try:
    X = np.load('X_train.npy')
    Y = np.load('Y_train.npy')

except:
    for idx, f in enumerate(files):
        arr = np.load(f)[::subsample]
        arr -= mnX
        arr /= stX
        arr = pca.transform(arr)

        n_rand = int(len(arr) * 3 / (t_in + t_out))

        idxs = np.arange(len(arr) - (t_in + t_out))
        np.random.shuffle(idxs)
        idxs = idxs[:n_rand]

        for idx in idxs:
            x = arr[idx : idx + t_in]
            y = arr[idx + t_in : idx + t_in + t_out]
            X.append(x)
            Y.append(y)

    X = np.stack(X)
    Y = np.stack(Y)

    np.save('X_train', X)
    np.save('Y_train', Y)

X.shape, Y.shape

((276577, 100, 30), (276577, 10, 30))

In [6]:
n_dim = 30

model = keras.Sequential([
    # Input and reshaping
    layers.Input  ((t_in, n_dim)),
    layers.Reshape((t_in, n_dim, 1)),
    
    # Convolutionals 1
    layers.Conv2D(
        activation = 'relu',
        filters = 64,
        kernel_size = 3,
        strides = 1
    ),
    layers.Conv2D(
        activation = 'relu',
        filters = 32,
        kernel_size = 3,
        strides = 1
    ),
    layers.MaxPooling2D(),
    layers.BatchNormalization(),
    
    # Convolutionals 2
    layers.Conv2D(
        activation = 'relu',
        filters = 64,
        kernel_size = 3,
        strides = 1
    ),
    layers.Conv2D(
        activation = 'relu',
        filters = 32,
        kernel_size = 3,
        strides = 1
    ),
    layers.MaxPooling2D(),
    layers.BatchNormalization(),
    
    # Flatten
    layers.Flatten(),
    
    # Fully connected
    layers.Dense(128, activation = 'relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    
    layers.Dense(64, activation = 'relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    
    layers.Dense(n_dim * t_out),
    layers.Reshape((t_out, n_dim))
])

model.compile(loss = 'mse', optimizer = 'adam')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 100, 30, 1)        0         
_________________________________________________________________
conv2d (Conv2D)              (None, 98, 28, 64)        640       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 96, 26, 32)        18464     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 48, 13, 32)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 48, 13, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 46, 11, 64)        18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 44, 9, 32)         1

In [7]:
hist = model.fit(
    X, Y,
    epochs = 50,
    validation_split = 0.1,
    batch_size = 512
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50

KeyboardInterrupt: 