In [1]:
from datetime import datetime
import os
import sys
sys.path.insert(0, '../../RISCluster/')

import h5py
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchvision
from torchvision import datasets
import torchvision.transforms as transforms
from torchvision.utils import save_image

from RISCluster.processing import cluster
from RISCluster.utils.utils import notify

# import importlib
# importlib.reload(cluster)

ModuleNotFoundError: No module named 'numpy'

In [None]:
fname_dataset = '../../../Data/DetectionData.h5'
M = int(1000)
M_train = int(0.8 * M)
M_val = int(0.2 * M)
M_test = M
LR = 0.0001     # Learning rate
N_EPOCHS = 100  # Number of epochs
BATCH_SZ = 128  # Batch size

def set_loading_index(M, fname_dataset, reserve=0.02):
    with h5py.File(fname_dataset, 'r') as f:
        DataSpec = '/30sec/Spectrogram'
        m, _, _ = f[DataSpec].shape
    index = np.random.choice(m, size=int(M * (2 + reserve)), replace=False)
    split = int(len(index)/2)
    index_test = index[split:]
    index_train_val = index[0:split]
    split_pct = 0.8
    split = int(split_pct * len(index_train_val))
    index_train = index_train_val[0:split]
    index_val = index_train_val[split:]
    return index_train, index_val, index_test

def load_data(fname_dataset, M, index):
    with h5py.File(fname_dataset, 'r') as f:
        #samples, frequency bins, time bins, amplitude
        DataSpec = '/30sec/Spectrogram'
        dset = f[DataSpec]
        m, n, o = dset.shape
        print('----------------------------------------------------------------')
        print(f'H5 file has {m} samples, {n} frequency bins, {o} time bins.')
        print(f'Loading {M} samples...')
        tic = datetime.now()

        np.seterr(divide='raise')
#         X = np.empty([M, n-2, o-173, 1])
        X = torch.empty([M, 1, n-2, o-173])
        idx_sample = np.empty([M,], dtype=np.int)
        dset_arr = np.empty([n, o])
#         dset_arr = torch.empty([1, n, o])
        count = 0
        for i in range(M):
            try:
                dset_arr = dset[index[i], 1:-1, 1:129]
                dset_arr /= dset_arr.max()
                X[count,:,:,:] = torch.from_numpy(np.expand_dims(dset_arr, axis=0))
                idx_sample[count,] = int(index[i])
                count += 1
            except:
                print('Numpy "Divide-by-zero Warning" raised, '
                      'skipping spectrogram.')
                pass

            print('%.2f' % (float(100*i/(M-1))) + '% complete.', end='\r')
        toc = datetime.now()
        print(f'\nTime elapsed = {toc}')

    # Update dimensions of X:
    m, p, n, o = list(X.size())
    print(f'Shape of output is {(m, p, n, o)}')
#     msgsubj = 'Training/Validation Data Loaded'
#     msgcontent = f'''{M} training/validation spectrograms loaded successfully.
# Time Elapsed = {(toc-tic)}'''
#     notify(msgsubj, msgcontent)
    print('----------------------------------------------------------------')
    return X, m, p, n, o, idx_sample


index_train, index_val, index_test = set_loading_index(M, fname_dataset, reserve=0.02)

X_train, m, n, o, p, idx_sample = load_data(fname_dataset, M_train, index_train)
X_val, m, n, o, p, idx_sample = load_data(fname_dataset, M_val, index_val)
X_test, m, n, o, p, idx_sample = load_data(fname_dataset, M_test, index_test)

In [None]:
class ConvAEC(nn.Module):
    def __init__(self, **kwargs):
        super(ConvAEC, self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=5, stride=2, padding=0),
            nn.ReLU(True),
            nn.Conv2d(8, 16, kernel_size=5, stride=2, padding=0),
            nn.ReLU(True),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True),            
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(True)            
        )
        # Input = (M, 64, 4, 8)
        # Output = (, 2048)
        self.enc2latent = nn.Sequential(
            nn.Flatten(),
            nn.Linear(2048, 32),
            nn.ReLU(True)
        )
        
        self.latent2dec = nn.Sequential(
            nn.Linear(32, 2048),
            nn.ReLU(True)
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(32, 16, kernel_size=5, stride=2, padding=2, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, kernel_size=5, stride=2, padding=2, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(8, 1, kernel_size=5, stride=2, padding=2, output_padding=1),
        )
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.enc2latent(x)
        x = self.latent2dec(x)
        x = x.view(-1, 64, 4, 8)
        x = self.decoder(x)
        return x
    
model = ConvAEC()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available() is True:
    print('CUDA device available, using GPU.') 
else:
    print('CUDA device not available, using CPU.') 
# model = ConvAEC().to(device)

In [None]:
model(X_train).size()

In [None]:
print(model)

In [None]:
f