In [1]:
import os
import math
import torch
import random
import pickle
import pandas
import calendar
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
os.environ['KMP_DUPLICATE_LIB_OK']='True'
torch.set_default_tensor_type(torch.FloatTensor)
import config

# Read and process data

In [2]:
X = torch.load('./data/neg_RT_ratio.data')
X.shape

torch.Size([9999, 12])

In [3]:
X_max = torch.max(X, dim=0)[0]
X_min = torch.min(X, dim=0)[0]

In [4]:
X_max

tensor([2.0498e+06, 3.1997e+04, 4.9998e+05, 1.0000e-04, 8.0000e-05, 5.0000e-04,
        4.0000e-05, 2.0000e-04, 1.5000e-04, 2.5000e+00, 1.6667e+01, 3.9800e+00])

In [5]:
X_min

tensor([2.5000e+05, 6.0000e+03, 3.0000e+05, 8.0000e-05, 4.0000e-05, 4.8000e-04,
        3.0000e-05, 8.0000e-05, 5.0000e-05, 1.0000e+00, 1.2000e+01, 5.3333e-01])

In [6]:
Xn = config.Normalization(X, X_min, X_max)
Xn

tensor([[0.7501, 0.2500, 0.2500,  ..., 0.1429, 0.7433, 0.2398],
        [0.2500, 0.7501, 0.7501,  ..., 0.6000, 0.1635, 0.2708],
        [0.3750, 0.3750, 0.6251,  ..., 0.5212, 0.4664, 0.2342],
        ...,
        [0.5671, 0.4215, 0.4828,  ..., 0.2705, 0.3631, 0.0735],
        [0.0671, 0.9216, 0.9828,  ..., 0.5170, 0.7673, 0.4087],
        [0.0000, 0.0000, 0.0000,  ..., 0.6667, 0.8571, 0.3095]])

In [7]:
Xn.max(0), Xn.min(0)

(torch.return_types.max(
 values=tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 indices=tensor([5459, 4367, 4175,   38,   87,   41,   22,  128,  179,  160,  640, 6532])),
 torch.return_types.min(
 values=tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 indices=tensor([9998, 9998, 9998,   53,   95,   57,   15,  220,  244, 2129,   57, 7102])))

In [9]:
Y = torch.load('./data/neg_param.data')
Y.shape

torch.Size([9999, 4])

In [10]:
Y_max = torch.max(Y, dim=0)[0]
Y_min = torch.min(Y, dim=0)[0]

In [11]:
Y_min

tensor([-1.2608e-01,  5.1942e-03, -4.7416e-01,  5.2967e+00],
       dtype=torch.float64)

In [12]:
Y_max

tensor([9.6749e-01, 9.8678e-01, 1.5411e-01, 4.8234e+02], dtype=torch.float64)

In [13]:
Yn = config.Normalization(Y, Y_min, Y_max)

In [15]:
Yn.max(0), Yn.min(0)

(torch.return_types.max(
 values=tensor([1., 1., 1., 1.], dtype=torch.float64),
 indices=tensor([1126, 8469, 8701, 4335])),
 torch.return_types.min(
 values=tensor([0., 0., 0., 0.], dtype=torch.float64),
 indices=tensor([9939, 8792, 2472, 5118])))

# Make dataset

In [16]:
E_train = int(X.shape[0]*0.7)
E_valid = int(X.shape[0]*0.2)
E_test  = int(X.shape[0] - E_train - E_valid)

config.SetSeed()
index = torch.randperm(X.shape[0])
index_learn = index[:E_train+E_valid]
index_train = index[:E_train]
index_valid = index[E_train:E_train+E_valid]
index_test  = index[-E_test:]

X_learn, Y_learn = X[index_learn,:], Y[index_learn,:]
X_train, Y_train = X[index_train,:], Y[index_train,:]
X_valid, Y_valid = X[index_valid,:], Y[index_valid,:]
X_test,  Y_test  = X[index_test,:] , Y[index_test,:]

Xn_learn, Yn_learn = Xn[index_learn,:], Yn[index_learn,:]
Xn_train, Yn_train = Xn[index_train,:], Yn[index_train,:]
Xn_valid, Yn_valid = Xn[index_valid,:], Yn[index_valid,:]
Xn_test,  Yn_test  = Xn[index_test,:] , Yn[index_test,:]

In [18]:
a = {'Xn': Xn, 'Yn': Yn, 
     'X': X,   'Y': Y, 
     
     'Xn_learn': Xn_learn, 'Yn_learn': Yn_learn, 
     'Xn_train': Xn_train, 'Yn_train': Yn_train, 
     'Xn_valid': Xn_valid, 'Yn_valid': Yn_valid, 
     'Xn_test' : Xn_test,  'Yn_test' : Yn_test,
     
     'X_learn': X_learn, 'Y_learn': Y_learn, 
     'X_train': X_train, 'Y_train': Y_train, 
     'X_valid': X_valid, 'Y_valid': Y_valid, 
     'X_test' : X_test,  'Y_test' : Y_test,
     
     'X_max':X_max, 'X_min':X_min,
     'Y_max':Y_max, 'Y_min':Y_min,}

torch.save(a, './data/neg.ds')