In [1]:
# Importing modules
import torch as to
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk
import torch.utils.data as to_data
from torch.utils.tensorboard import SummaryWriter as sumwriter
import os as os

In [2]:
# Specify hardware for ML training (GPU default)
device = "cuda" if to.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [3]:
# Quickly generate list of strings for frequency numbers and ratios
def freq_name(no_freq, include_freq=True, include_ratio=True):
    """
    Creates an ordered list of string from inputted parameters:

    no_freq = (int) number of desired frequencies
    include_freq = (bool) include the individual frequencies or not (default True)
    include_ratio = (bool) include the non-trivial ratios between frequencies or not (default True)
    """
    names = []
    if include_freq:
        for i in range(no_freq):
            names.append('f'+str(i+1))
    if include_ratio:
        for i in range(no_freq):
            for j in range(i):
                names.append('f'+str(i+1)+'/f'+str(j+1))
    return names

In [4]:
# Create Pytorch dataset class for data batching during training
class FFNN_data(to_data.Dataset):
    def __init__(self, scaled_dataframe, X_names, Y_names):
        self.len = len(scaled_dataframe)
        self.X = to.from_numpy(scaled_dataframe[X_names].to_numpy().astype('float32')).to(device)
        self.Y = to.from_numpy(scaled_dataframe[Y_names].to_numpy().astype('float32')).to(device)

    def __len__(self):
        return self.len
  
    def __getitem__(self, idx):
        X_idx = self.X[idx,:]
        Y_idx = self.Y[idx,:]
        return X_idx, Y_idx

In [5]:
# Creates function that returns desired activation function
def activation(activ_name):
    if activ_name=='relu':
        return to.nn.ReLU()
    elif activ_name=='lrelu':
        return to.nn.LeakyReLU()
    elif activ_name=='prelu':
        return to.nn.PReLU()
    elif activ_name=='relu6':
        return to.nn.ReLU6()
    elif activ_name=='sigmoid':
        return to.nn.Sigmoid()
    elif activ_name=='tanh':
        return to.nn.Tanh()
    elif activ_name=='silu':
        return to.nn.SiLU()
    elif activ_name=='selu':
        return to.nn.SELU()
    elif activ_name=='celu':
        return to.nn.CELU()
    elif activ_name=='gelu':
        return to.nn.GELU()
    else:
        return to.nn.ReLU()

In [6]:
class dAE_Network(to.nn.Module):
    def __init__(self, num_X, num_Y, num_Z, henc_nodes, hdec_nodes, hdis_nodes, hactiv_type):
        super(dAE_Network, self).__init__()

        self.encoder = []
        self.encoder.append(to.nn.Linear(num_X, henc_nodes[0]))
        self.encoder.append(activation(hactiv_type))

        for i in range(len(henc_nodes)-1):
            self.encoder.append(to.nn.Linear(henc_nodes[i], henc_nodes[i+1]))
            self.encoder.append(activation(hactiv_type))

        self.encoder.append(to.nn.Linear(henc_nodes[-1], num_Z))

        self.encoder = to.nn.Sequential(*self.encoder).to(device)
        for i in self.encoder[::2]:
            to.nn.init.kaiming_uniform_(i.weight)
            to.nn.init.zeros_(i.bias)


        self.decoder = []
        self.decoder.append(to.nn.Linear(num_Z, hdec_nodes[0]))
        self.decoder.append(activation(hactiv_type))

        for i in range(len(hdec_nodes)-1):
            self.decoder.append(to.nn.Linear(hdec_nodes[i], hdec_nodes[i+1]))
            self.decoder.append(activation(hactiv_type))

        self.decoder.append(to.nn.Linear(hdec_nodes[-1], num_X))

        self.decoder = to.nn.Sequential(*self.decoder).to(device)
        for i in self.decoder[::2]:
            to.nn.init.kaiming_uniform_(i.weight)
            to.nn.init.zeros_(i.bias)


        self.disentangler = []
        self.disentangler.append(to.nn.Linear(num_Z, hdis_nodes[0]))
        self.disentangler.append(activation(hactiv_type))

        for i in range(len(hdis_nodes)-1):
            self.disentangler.append(to.nn.Linear(hdis_nodes[i], hdis_nodes[i+1]))
            self.disentangler.append(activation(hactiv_type))

        self.disentangler.append(to.nn.Linear(hdis_nodes[-1], num_Y))

        self.disentangler = to.nn.Sequential(*self.disentangler).to(device)
        for i in self.disentangler[::2]:
            to.nn.init.kaiming_uniform_(i.weight)
            to.nn.init.zeros_(i.bias)

    def forward(self, X):
        Z = self.encoder(X)
        Xr = self.decoder(Z)
        Y = self.disentangler(Z)
        return Xr, Y

In [7]:
def train_epoch(
    network,
    train_dataloader, beta,
    loss_function, optimizer,
    tb_writer, epoch_ind
    ):

    loss_list = []
    MAPE_list = []

    for i, data in enumerate(train_dataloader):
        X, Y = data

        if epoch_ind==0 and i==0:
            tb_writer.add_graph(network, X, verbose=False)

        optimizer.zero_grad()
        predictX, predictY = network(X)

        loss = beta*loss_function(predictY, Y) + loss_function(predictX, X)
        loss_list.append(loss.item())

        loss.backward()
        optimizer.step()

        MAPE = to.mean(to.abs((Y - predictY) / Y)*100)
        MAPE_list.append(MAPE.item())
    
    mean_loss = to.mean(to.tensor(loss_list, device=device)).item()
    mean_MAPE = to.mean(to.tensor(MAPE_list, device=device)).item()

    return mean_loss, mean_MAPE


def valid_epoch(
    network,
    valid_dataloader, beta,
    loss_function
    ):

    loss_list = []
    MAPE_list = []

    for i, data in enumerate(valid_dataloader):
        X, Y = data
        predictX, predictY = network(X)

        loss = beta*loss_function(predictY, Y) + loss_function(predictX, X)
        loss_list.append(loss.item())
 
        MAPE = to.mean(to.abs((Y - predictY) / Y)*100)
        MAPE_list.append(MAPE.item())
    
    mean_loss = to.mean(to.tensor(loss_list, device=device)).item()
    mean_MAPE = to.mean(to.tensor(MAPE_list, device=device)).item()
    return mean_loss, mean_MAPE

In [8]:
def train_dAE(
    network, beta,
    train_dataloader, valid_dataloader,
    loss_function, optimizer_type,
    epochs, learn_rate
    ):

    if optimizer_type=='adam':
        optimizer = to.optim.Adam(network.parameters(), lr=learn_rate)
    else:
        optimizer = to.optim.SGD(network.parameters(), lr=learn_rate)
    
    tb_writer = sumwriter('Current_ML_Results/Tensorboard')
    
    for i in range(epochs):
        network.train(True)
        mloss, mMAPE = train_epoch(network, train_dataloader,beta, loss_function, optimizer, tb_writer, i)

        network.eval()
        with to.no_grad():
            vmloss, vmMAPE = valid_epoch(network, valid_dataloader,beta, loss_function)
        

        print('-'*50)
        print('Epoch {} / {}'.format(i+1,epochs))
        print('-'*15)
        print('Average Train Loss : {}'.format(mloss))
        print('Average Validation Loss : {}'.format(vmloss))

        tb_writer.add_scalars("Batch Mean Loss",
                            {
                                'Train' : mloss,
                                'Validation' : vmloss
                            }, i+1)

        tb_writer.add_scalars("Batch MAPE",
                            {
                                'Train' : mMAPE,
                                'Validation' : vmMAPE
                            }, i+1)

    tb_writer.flush()
    tb_writer.close()

In [9]:
# Prepare Data
data = pd.read_csv('Data Workspace/FM_TV_Data.csv')
num_freq = 10

features = freq_name(num_freq,1,0)
labels = ['nu', 'a/b']

train_split = int(0.8*len(data))
valid_split = len(data)- train_split

scaled_data = data[features+labels].copy()
scaled_data[freq_name(num_freq,1,0)] = scaled_data[freq_name(num_freq,1,0)]*(100+np.random.normal(3.77,3.18,scaled_data[freq_name(num_freq,1,0)].shape))/100
scaled_data[freq_name(num_freq,1,0)] = np.log(scaled_data[freq_name(num_freq,1,0)])
# scaled_data['psi'] = np.log(scaled_data['psi'])

scaled_data = FFNN_data(scaled_data, features, labels)
train_set, valid_set = to_data.random_split(scaled_data, [train_split, valid_split])

In [10]:
# Train Model
# Parameters
num_X = len(features)
num_Y = len(labels)
henc_nodes = [10,20,40,80,160,80,40,20,10]
hdec_nodes = [10,20,40,80,160,80,40,20,10]
hdis_nodes = [10,20,40,80,160,80,40,20,10]
hactiv = 'silu'

beta = 1
num_Z = 10

batch_size_train = 200
batch_size_valid = 2000

epochs = 200
learn_rate = 1e-3

# Optim Selections
loss_function = to.nn.SmoothL1Loss()
optimizer_type = 'adam'

# Data loaders
train_loader = to.utils.data.DataLoader(train_set, batch_size=batch_size_train, shuffle=True)
valid_loader = to.utils.data.DataLoader(valid_set, batch_size=batch_size_valid, shuffle=True)

# Model
model = dAE_Network(num_X, num_Y, num_Z, henc_nodes, hdec_nodes, hdis_nodes, hactiv)

In [11]:
# Train
train_dAE(
    model, beta,
    train_loader, valid_loader,
    loss_function, optimizer_type,
    epochs, learn_rate)

to.save(model.state_dict(), 'Current_ML_Results/model.state')

--------------------------------------------------
Epoch 1 / 200
---------------
Average Train Loss : 2.9584429264068604
Average Validation Loss : 0.31804585456848145
--------------------------------------------------
Epoch 2 / 200
---------------
Average Train Loss : 0.268101304769516
Average Validation Loss : 0.2653183341026306
--------------------------------------------------
Epoch 3 / 200
---------------
Average Train Loss : 0.2580256164073944
Average Validation Loss : 0.26277413964271545
--------------------------------------------------
Epoch 4 / 200
---------------
Average Train Loss : 0.2585861384868622
Average Validation Loss : 0.2594972848892212
--------------------------------------------------
Epoch 5 / 200
---------------
Average Train Loss : 0.25726714730262756
Average Validation Loss : 0.2777474522590637
--------------------------------------------------
Epoch 6 / 200
---------------
Average Train Loss : 0.2552714943885803
Average Validation Loss : 0.2603759169578552
--

KeyboardInterrupt: 

In [12]:
# model.load_state_dict(to.load('Current_ML_Results/model.state'))

trainvalid_data = pd.read_csv('Data Workspace/FM_TV_Data.csv')
scaled_trainvalid_data = trainvalid_data[features+labels].copy()

# scaled_trainvalid_data['psi'] = np.log(scaled_trainvalid_data['psi'])
scaled_trainvalid_data[freq_name(num_freq,1,0)] = np.log(scaled_trainvalid_data[freq_name(num_freq,1,0)])

scaled_trainvalid_data = FFNN_data(scaled_trainvalid_data, features, labels)
test_loader = to.utils.data.DataLoader(scaled_trainvalid_data, batch_size=len(scaled_trainvalid_data), shuffle=False)

model.eval()
with to.no_grad():
    for i, data in enumerate(test_loader):
        X, Y = data
        predictX, predictY = model.forward(X)
            
        Y[:,0] = to.exp(Y[:,0])
        predictY[:,0] = to.exp(predictY[:,0])
        abs_perc_error = to.abs((Y- predictY)/Y)*100
        MAPE_per_dim = to.mean(abs_perc_error, 0)

        np.savetxt('Current_ML_Results/MAPE_trainvalid.txt', MAPE_per_dim.cpu().numpy())

        print('Absolute Percentage Errors: ')
        print('-'*20)
        print(MAPE_per_dim)

Absolute Percentage Errors: 
--------------------
tensor([  3.8314, 100.7179], device='cuda:0')


In [13]:
# model.load_state_dict(to.load('Current_ML_Results/model.state'))

test_data = pd.read_csv('Data Workspace/FM_Te_Data.csv')
scaled_test_data = test_data[features+labels].copy()

# scaled_test_data['psi'] = np.log(scaled_test_data['psi'])
scaled_test_data[freq_name(num_freq,1,0)] = np.log(scaled_test_data[freq_name(num_freq,1,0)])

scaled_test_data = FFNN_data(scaled_test_data, features, labels)
test_loader = to.utils.data.DataLoader(scaled_test_data, batch_size=len(scaled_test_data), shuffle=False)

model.eval()
with to.no_grad():
    for i, data in enumerate(test_loader):
        X, Y = data
        predictX, predictY = model.forward(X)
            
        Y[:,0] = to.exp(Y[:,0])
        predictY[:,0] = to.exp(predictY[:,0])
        abs_perc_error = to.abs((Y- predictY)/Y)*100
        MAPE_per_dim = to.mean(abs_perc_error, 0)

        np.savetxt('Current_ML_Results/MAPE_test.txt', MAPE_per_dim.cpu().numpy())

        print('Absolute Percentage Errors: ')
        print('-'*20)
        print(MAPE_per_dim)

Absolute Percentage Errors: 
--------------------
tensor([  3.8353, 101.0960], device='cuda:0')
