In [2]:
import os
import sys
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA

import cv2
from PIL import Image as im
from sklearn.metrics import jaccard_score

import collections
from typing import DefaultDict, Tuple, List, Dict
from functools import partial

sys.path.append('early-stopping-pytorch')
from pytorchtools import EarlyStopping

In [3]:
# Retrieve unique ID used to name a model when saving files related to it.
# The current integer is the previously trained model. Training will increment the value.
COUNTER_FILENAME = os.path.expanduser("model_counter.txt")
global MODEL_ID


def update_model_id():
    global MODEL_ID
    with open(COUNTER_FILENAME, "w") as f:
        count = int(MODEL_ID)
        count += 1
        count = str(count)
        f.write(count)
        MODEL_ID = count

        
def retrieve_model_id():
    global MODEL_ID
    try:
        with open(COUNTER_FILENAME, 'r') as f:
            count = f.read()
            MODEL_ID = count
    except FileNotFoundError:
        print('New counter file.')
        with open(COUNTER_FILENAME, 'w') as f:
            count = '0'
            f.write(count)
            MODEL_ID = count

            
retrieve_model_id()

# Adjust printing view dimensions
np.set_printoptions(threshold=sys.maxsize, linewidth=300)
torch.set_printoptions(threshold=sys.maxsize, linewidth=300, profile='full')

In [4]:
class VAE(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        self.input_shape = kwargs["input_shape"]
        # number of hidden units in first hidden layer
        self.n_units = kwargs["n_units"]
        # number of hidden units in latent space
        self.latent_units = kwargs["latent_units"]
        
        self.encoder = nn.Sequentual(
                            # Initial tensor is batch_sizex1x30x30
                            nn.Conv2d(
                                in_channels=1, 
                                out_channels=16,
                                kernel_size=3, 
                                stride=2, 
                                padding=1),
                            nn.BatchNorm2d(16),
                            nn.ReLU(),
                            # tensor is batch_sizex16x15x15 since:
                            #   ((30 + (2 * 1) - (1 * (3 - 1)) - 1) / 2) + 1
                            # = ((30 + 2 - 2 - 1) / 2) + 1
                            # = (29 / 2) + 1 = 14 + 1 = 15
                            nn.Conv2d(
                                in_channels=16, 
                                out_channels=32,
                                kernel_size=3, 
                                stride=2, 
                                padding=1),
                            nn.BatchNorm2d(32),
                            nn.ReLU())
                            # tensor is batch_sizex32x8x8 since:
                            # ((15 + (2 * 1) - (1 * (3 - 1)) - 1) / 2) + 1
                            # = ((15 + 2 - 2 - 1) / 2) + 1
                            # = (14 / 2) + 1 = 8
        
        # 32 * 8 * 8 = 2048
        self.mean = nn.Linear(2048, 4)
        self.log_var = nn.Linear(2048, 4)
        
        self.decoder_input = nn.Linear(4, 256)
        
        self.decoder = nn.Sequential(
                            nn.ConvTranspose2d(
                                hidden_dims[i],
                                hidden_dims[i + 1],
                                kernel_size=3,
                                stride = 2,
                                padding=1,
                                output_padding=1),
                            nn.BatchNorm2d(hidden_dims[i + 1]),
                            nn.ReLU())
        
        self.final_layer = nn.Sequential(
                                nn.ConvTranspose2d(
                                    hidden_dims[-1],
                                    hidden_dims[-1],
                                    kernel_size=3,
                                    stride=2,
                                    padding=1,
                                    output_padding=1),
                                nn.BatchNorm2d(hidden_dims[-1]),
                                nn.LeakyReLU(),
                                nn.Conv2d(
                                    hidden_dims[-1], 
                                    out_channels=3,
                                    kernel_size=3, 
                                    padding=1),
                                nn.Sigmoid())
    def forward(self, X):
        return

In [5]:
# Load the dataset
# 10,000 samples, 30x30 matrices
data_count = 10000
data = np.ndarray(shape=(data_count,30,30))
n_features = data.shape[1] * data.shape[2]


for i in range(data_count):
    path = f'data/jet_matrices/sample{i+1}.dat'
    sample = np.loadtxt(path, unpack = False)
    data[i] = sample

print("Done loading data.")

Done loading data.


In [6]:
# Load parameters corresponding to the 4 variables input into 
# the Helmholtz Resonator function, where output is each sample in dataset.
params = np.ndarray(shape=(data_count,4))

path = r'data/param_lhs.dat'
with open(path) as f:
    lines = f.readlines()
    for i, line in enumerate(lines):
        if i >= params.shape[0]:
            break
        param = np.fromstring(line, dtype=float, sep=',')
        params[i] = param

print("Done loading parameters.")

Done loading parameters.


In [14]:
# Convert from numpy array to Pytorch tensor
X = torch.from_numpy(data)
# Convert all scalars to floats
X = X.float()
print(X.shape)

X_with_params = []
for i in range(data_count):
    pair = [X[i], params[i], i]
    X_with_params.append(pair)

torch.Size([10000, 30, 30])
