In [1]:
import os
import math
import torch
import random
import pickle
import pandas
import calendar
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
os.environ['KMP_DUPLICATE_LIB_OK']='True'
torch.set_default_tensor_type(torch.FloatTensor)
import config

# Read and process data

In [2]:
x = pandas.read_csv("./data/inv_RT_ratio.csv")

In [3]:
X = []
for i in range(9999):
    
    f1 = x.iloc[i, :]
    F = [float(f) for f in f1.to_numpy()]
    X.append(F)
X = torch.tensor(X)
X = X[:,:7]
X.shape

torch.Size([9999, 7])

In [4]:
X_max = torch.max(X, dim=0)[0]
X_min = torch.min(X, dim=0)[0]

In [5]:
X_max

tensor([5.0000e+02, 2.5000e+02, 5.0000e+05, 4.0000e+05, 5.0000e+05, 8.0000e-04,
        7.0000e-05])

In [6]:
X_min

tensor([1.0000e+01, 5.0000e+00, 1.0000e+04, 8.0000e+03, 1.0000e+04, 2.0000e-04,
        1.0000e-05])

In [7]:
Xn = config.Normalization(X, X_min, X_max)
Xn

tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.5000, 0.5020, 0.5000,  ..., 0.5000, 0.5000, 0.5000],
        [0.7510, 0.2490, 0.2490,  ..., 0.7510, 0.7500, 0.2500],
        ...,
        [0.3918, 0.0367, 0.3490,  ..., 0.5531, 0.1767, 0.9833],
        [0.8918, 0.5388, 0.8490,  ..., 0.0531, 0.6767, 0.4833],
        [0.4755, 0.2000, 0.5204,  ..., 0.1816, 0.9950, 0.2833]])

In [8]:
Xn.max(), Xn.min()

(tensor(1.), tensor(0.))

In [9]:
y = pandas.read_csv("./data/inv_eta.csv")
y.shape

(9999, 4)

In [10]:
Y = []
for i in range(9999):
    
    f1 = y.iloc[i, :]
    F = [float(f) for f in f1.to_numpy()]
    Y.append(F)
Y = torch.tensor(Y)
Y.shape

torch.Size([9999, 4])

In [11]:
Y_max = torch.max(Y, dim=0)[0]
Y_min = torch.min(Y, dim=0)[0]

In [12]:
Yn = config.Normalization(Y, Y_min, Y_max)

In [13]:
Yn.max(), Yn.min()

(tensor(1.), tensor(0.))

# Make dataset

In [14]:
E_train = int(X.shape[0]*0.7)
E_valid = int(X.shape[0]*0.2)
E_test  = int(X.shape[0] - E_train - E_valid)

config.SetSeed()
index = torch.randperm(X.shape[0])
index_learn = index[:E_train+E_valid]
index_train = index[:E_train]
index_valid = index[E_train:E_train+E_valid]
index_test  = index[-E_test:]

X_learn, Y_learn = X[index_learn,:], Y[index_learn,:]
X_train, Y_train = X[index_train,:], Y[index_train,:]
X_valid, Y_valid = X[index_valid,:], Y[index_valid,:]
X_test,  Y_test  = X[index_test,:] , Y[index_test,:]

Xn_learn, Yn_learn = Xn[index_learn,:], Yn[index_learn,:]
Xn_train, Yn_train = Xn[index_train,:], Yn[index_train,:]
Xn_valid, Yn_valid = Xn[index_valid,:], Yn[index_valid,:]
Xn_test,  Yn_test  = Xn[index_test,:] , Yn[index_test,:]

In [15]:
a = {'Xn': Xn, 'Yn': Yn, 
     'X': X,   'Y': Y, 
     
     'Xn_learn': Xn_learn, 'Yn_learn': Yn_learn, 
     'Xn_train': Xn_train, 'Yn_train': Yn_train, 
     'Xn_valid': Xn_valid, 'Yn_valid': Yn_valid, 
     'Xn_test' : Xn_test,  'Yn_test' : Yn_test,
     
     'X_learn': X_learn, 'Y_learn': Y_learn, 
     'X_train': X_train, 'Y_train': Y_train, 
     'X_valid': X_valid, 'Y_valid': Y_valid, 
     'X_test' : X_test,  'Y_test' : Y_test,
     
     'X_max':X_max, 'X_min':X_min,
     'Y_max':Y_max, 'Y_min':Y_min,}

with open('./data/inv_dataset_without_ratio.p', 'wb') as file:
    pickle.dump(a, file)
with open('../../LNC/dataset/inv_dataset_without_ratio.p', 'wb') as file:
    pickle.dump(a, file)