In [1]:
# from tqdm import tqdm
import pandas as pd
import numpy as np
from random import shuffle
import pickle

In [2]:
import torch
from torch import optim
from torch.autograd import Variable,gradcheck
from torch.utils.data import DataLoader

# sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

In [3]:
sing = pd.read_csv('wooddata/singleton.csv')
pair = pd.read_csv('wooddata/pair.csv')
trip = pd.read_csv('wooddata/triplet.csv')

## Inspection

In [4]:
x_sing = sing[['[Sal]', '[Ery]', '[Cm]', '[Tmp]', '[Ofl]', '[Dox]', '[Linc]']].values
x_pair = pair[['[Sal]', '[Ery]', '[Cm]', '[Tmp]', '[Ofl]', '[Dox]', '[Linc]']].values
x_trip = trip[['[Sal]', '[Ery]', '[Cm]', '[Tmp]', '[Ofl]', '[Dox]', '[Linc]']].values
x = np.concatenate((x_sing, x_pair, x_trip), axis=0)
y_sing = sing['g'].values
y_pair = pair['g'].values
y_trip = trip['g'].values
y = np.concatenate((y_sing, y_pair, y_trip), axis=0)

In [None]:
with open('embeddings', 'rb') as handle:
    edge_vals = pickle.load(handle)

In [5]:
from DNN import DNN

# split dataset
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.50)
#X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.50)

len(X_train), len(X_test)

(1460, 1460)

In [6]:
#Percent of training set
# one_tenth_length = int(len(X_train) * 0.1)
# X_train = X_train[:one_tenth_length * 3]
# y_train = y_train[:one_tenth_length * 3]
# print(len(X_train))
# print(len(y_train))

438
438


In [7]:
# normalize on training set and apply to test set
std_scaler_x = StandardScaler()
std_scaler_y = StandardScaler()

# fit and transform on training set
X_train = std_scaler_x.fit_transform(X_train)
y_train = std_scaler_y.fit_transform(
    y_train[:, np.newaxis]
).ravel()

# apply transformation on validation set
# X_val = std_scaler_x.transform(X_val)
# y_val = std_scaler_y.transform(
#     y_val[:, np.newaxis]
# ).ravel()

# apply transformation on test set
X_test = std_scaler_x.transform(X_test)
y_test = std_scaler_y.transform(
    y_test[:, np.newaxis]
).ravel()

In [8]:
%%time
# CUDA support 
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# model settings
D_in  = X_train.shape[1]
D_out = 1
H = 50
Depth = 5
NUMEPOCHS = 500
Batch_size = 100

model = DNN(
    input_size=D_in,
    output_size=D_out,
    hidden_size=H,
    depth=Depth
).to(device)

# Loss Function
criterion = torch.nn.MSELoss()

# Optimizer
optimizer = optim.Adamax(model.parameters())

CPU times: user 4.54 ms, sys: 236 µs, total: 4.78 ms
Wall time: 3.98 ms


In [9]:
# Input Data
trainX = Variable(torch.from_numpy(X_train).float())
trainY = Variable(torch.from_numpy(y_train).float())
testX = Variable(torch.from_numpy(X_test).float())
testY = Variable(torch.from_numpy(y_test).float())

print(trainX.shape)
print(trainY.shape)
print(testX.shape)
print(testY.shape)

# For Supervising Losses
losses = list()

if torch.cuda.is_available():
    trainX = trainX.to(device)
    trainY = trainY.to(device)
    testX = testX.to(device)
    testY = testY.to(device)
    
data_train_loader = DataLoader(
    list(zip(trainX,trainY)), 
    batch_size=Batch_size, 
    shuffle=True
)

torch.Size([438, 7])
torch.Size([438])
torch.Size([1460, 7])
torch.Size([1460])


In [10]:
# Train the model
mse = []
r2 = []
for i in range(10):
    print("Epochs")
    for epoch in range(NUMEPOCHS):
        for batchX, batchY in data_train_loader: 
            # Forward pass
            outputs = model(batchX)
            loss = criterion(outputs.squeeze(), batchY)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()     
            optimizer.step()
            losses.append(loss)

        # Print Epochs and Losses to Monitor Convergence
        if epoch % 50 == 0:
            print("{}".format(epoch),end = ", ")
            print(loss)

    print('\nTraining Complete')
    pred = model(testX)
    predictions = pred.detach().cpu().numpy().ravel()
    testvals = testY.cpu().numpy()
    predictions = std_scaler_y.inverse_transform(predictions[:, np.newaxis]).ravel()
    testvals = std_scaler_y.inverse_transform(testvals[:, np.newaxis]).ravel()
    mse.append(np.mean((predictions - testvals) ** 2))
    r2.append(r2_score(testvals,predictions))

Epochs
0, tensor(1.0671, grad_fn=<MseLossBackward>)
50, tensor(0.0842, grad_fn=<MseLossBackward>)
100, tensor(0.0820, grad_fn=<MseLossBackward>)
150, tensor(0.0349, grad_fn=<MseLossBackward>)
200, tensor(0.0649, grad_fn=<MseLossBackward>)
250, tensor(0.0711, grad_fn=<MseLossBackward>)
300, tensor(0.0570, grad_fn=<MseLossBackward>)
350, tensor(0.0627, grad_fn=<MseLossBackward>)
400, tensor(0.1775, grad_fn=<MseLossBackward>)
450, tensor(0.0458, grad_fn=<MseLossBackward>)

Training Complete
Epochs
0, tensor(0.0304, grad_fn=<MseLossBackward>)
50, tensor(0.0414, grad_fn=<MseLossBackward>)
100, tensor(0.0621, grad_fn=<MseLossBackward>)
150, tensor(0.0784, grad_fn=<MseLossBackward>)
200, tensor(0.0322, grad_fn=<MseLossBackward>)
250, tensor(0.0402, grad_fn=<MseLossBackward>)
300, tensor(0.0352, grad_fn=<MseLossBackward>)
350, tensor(0.0678, grad_fn=<MseLossBackward>)
400, tensor(0.0451, grad_fn=<MseLossBackward>)
450, tensor(0.0280, grad_fn=<MseLossBackward>)

Training Complete
Epochs
0, tens

In [11]:
np.mean(r2)

0.9006357863794557

In [12]:
np.mean(mse)

0.004929027