In [1]:
import os
import math
import torch
import random
import pickle
import pandas
import calendar
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
os.environ['KMP_DUPLICATE_LIB_OK']='True'
torch.set_default_tensor_type(torch.FloatTensor)
import config

# Read and process data

In [2]:
X = torch.load('./data/hard_sigmoid_RT_ratio.data').float()
X.shape

torch.Size([9999, 4])

In [3]:
X_max = torch.max(X, dim=0)[0]
X_min = torch.min(X, dim=0)[0]

In [4]:
X_max

tensor([9.9988e+06, 1.0000e-03, 5.0000e-04, 2.4390e+01])

In [5]:
X_min

tensor([2.0120e+05, 2.0000e-04, 4.0000e-05, 4.0080e-01])

In [6]:
Xn = config.Normalization(X, X_min, X_max)
Xn

tensor([[0.5000, 0.5000, 0.5000, 0.0759],
        [0.7501, 0.2500, 0.2500, 0.0909],
        [0.2499, 0.7500, 0.7500, 0.0699],
        ...,
        [0.5671, 0.4212, 0.4826, 0.0687],
        [0.0670, 0.9212, 0.9826, 0.0627],
        [0.0546, 0.9300, 0.2261, 0.2566]])

In [7]:
Xn.max(0), Xn.min(0)

(torch.return_types.max(
 values=tensor([1., 1., 1., 1.]),
 indices=tensor([5459, 1283,  865, 3853])),
 torch.return_types.min(
 values=tensor([0., 0., 0., 0.]),
 indices=tensor([8189, 1540,  544, 2568])))

In [8]:
Y = torch.load('./data/hard_sigmoid_power.data').view(-1,1).float()
Y.shape

torch.Size([9999, 1])

In [9]:
Y_max = torch.max(Y, dim=0)[0]
Y_min = torch.min(Y, dim=0)[0]

In [10]:
Yn = config.Normalization(Y, Y_min, Y_max)

In [11]:
Yn.max(), Yn.min()

(tensor(1.), tensor(0.))

# Make dataset

In [12]:
E_train = int(X.shape[0]*0.7)
E_valid = int(X.shape[0]*0.2)
E_test  = int(X.shape[0] - E_train - E_valid)

config.SetSeed()
index = torch.randperm(X.shape[0])
index_learn = index[:E_train+E_valid]
index_train = index[:E_train]
index_valid = index[E_train:E_train+E_valid]
index_test  = index[-E_test:]

X_learn, Y_learn = X[index_learn,:], Y[index_learn,:]
X_train, Y_train = X[index_train,:], Y[index_train,:]
X_valid, Y_valid = X[index_valid,:], Y[index_valid,:]
X_test,  Y_test  = X[index_test,:] , Y[index_test,:]

Xn_learn, Yn_learn = Xn[index_learn,:], Yn[index_learn,:]
Xn_train, Yn_train = Xn[index_train,:], Yn[index_train,:]
Xn_valid, Yn_valid = Xn[index_valid,:], Yn[index_valid,:]
Xn_test,  Yn_test  = Xn[index_test,:] , Yn[index_test,:]

In [13]:
a = {'Xn': Xn, 'Yn': Yn, 
     'X': X,   'Y': Y, 
     
     'Xn_learn': Xn_learn, 'Yn_learn': Yn_learn, 
     'Xn_train': Xn_train, 'Yn_train': Yn_train, 
     'Xn_valid': Xn_valid, 'Yn_valid': Yn_valid, 
     'Xn_test' : Xn_test,  'Yn_test' : Yn_test,
     
     'X_learn': X_learn, 'Y_learn': Y_learn, 
     'X_train': X_train, 'Y_train': Y_train, 
     'X_valid': X_valid, 'Y_valid': Y_valid, 
     'X_test' : X_test,  'Y_test' : Y_test,
     
     'X_max':X_max, 'X_min':X_min,
     'Y_max':Y_max, 'Y_min':Y_min,}

torch.save(a, './data/hard_sigmoid_power.ds')