In [None]:
import sys
sys.path.insert(0, '../')
from functions import *

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)
# Random Seed for dataset generation
sampling_seed = 15
torch.manual_seed(sampling_seed)

# Variables to CF

In [None]:
data = pd.read_table('TrainingData_401.txt', delimiter=' ', dtype='float', header=None)
data = np.array(data.iloc[:,:])

perc = 0
validation_size = int(perc*data.shape[0]/100)
training_size = data.shape[0] - validation_size
data_norm, data_train, data_val = [], [], []
for i in range(data.shape[1]):
    data_norm.append(data[:,i])
    data_train.append(data[:training_size,i])
    data_val.append(data[training_size:,i])

data_mean, data_std = [], []
for i in range(data.shape[1]):
    data_mean.append(data[:,i].mean())
    data_std.append(data[:,i].std())
    data_norm[i] = ((data_norm[i]-data_mean[i])/data_std[i]).reshape(-1,1)
    data_train[i] = ((data_train[i]-data_mean[i])/data_std[i]).reshape(-1,1)
    data_val[i] = ((data_val[i]-data_mean[i])/data_std[i]).reshape(-1,1)

network_properties = {
    "hidden_layers": [4, 10, 20],
    "neurons": [20, 50, 100],
    "regularization_exp": [2],
    "regularization_param": [0],
    "batch_size": [data_norm[0].shape[0]],
    "epochs": [1000],
    "optimizer": ["LBFGS"],
    "init_weight_seed": [np.random.randint(0,100)]
}

settings = list(itertools.product(*network_properties.values()))

i = 0

train_err_conf = list()
val_err_conf = list()
test_err_conf = list()
for set_num, setup in enumerate(settings):
    print("###################################", set_num, "###################################")
    setup_properties = {
        "hidden_layers": setup[0],
        "neurons": setup[1],
        "regularization_exp": setup[2],
        "regularization_param": setup[3],
        "batch_size": setup[4],
        "epochs": setup[5],
        "optimizer": setup[6],
        "init_weight_seed": setup[7]
    }

    relative_error_train_, relative_error_val_ = run_configuration(setup_properties, torch.tensor(np.concatenate((data_train[:8]),axis=1)).float(), torch.tensor(data_train[8]).float(), 8, 1)
    train_err_conf.append(relative_error_train_)
    val_err_conf.append(relative_error_val_)

print(train_err_conf, val_err_conf)

train_err_conf = np.array(train_err_conf)
val_err_conf = np.array(val_err_conf)

plt.figure(figsize=(16, 8))
plt.grid(True, which="both", ls=":")
plt.scatter(np.log10(train_err_conf), np.log10(val_err_conf), marker="*")
plt.xlabel("Selection Criterion")
plt.ylabel("Generalization Error")
plt.title(r'Validation - Training Error VS Generalization error ($\sigma=0.0$)')
#plt.savefig("sigma.png", dpi=400)
plt.show()

In [None]:
batch_size = data_norm[0].shape[0]
retrain = 128
n_epochs_1 = 300
training = DataLoader(torch.utils.data.TensorDataset(torch.tensor(np.concatenate((data_train[:8]),axis=1)), torch.tensor(data_train[8])), batch_size=batch_size, shuffle=True)
my_network_1 = NeuralNet_Seq(input_dimension=8, output_dimension=1, n_hidden_layers=4, neurons=20)
init_xavier(my_network_1, retrain + np.random.randint(-retrain,retrain))
optimizer = optim.LBFGS(my_network_1.parameters(), lr=0.1, max_iter=1, max_eval=50000, tolerance_change=1.0 * np.finfo(float).eps)
if perc == 0:
    history_1 = fit(my_network_1, training, n_epochs_1, optimizer, p=2, reg_param=0.0, verbose=False)
else:
    history_1 = fit_k(my_network_1, training, torch.tensor(np.concatenate((data_val[:8]),axis=1)).float(), torch.tensor(data_val[8]), n_epochs_1, optimizer, p=2, reg_param=0.0, verbose=False)

In [None]:
plt.figure()
plt.grid(True, which="both", ls=":")
if perc == 0:
    plt.plot(np.arange(1,n_epochs_1+1), history_1, label='Training loss')
else:
    plt.plot(np.arange(1,n_epochs_1+1), history_1[0], label='Training loss')
    plt.plot(np.arange(1,n_epochs_1+1), history_1[1], label='Validation loss')
plt.xscale("log")
plt.yscale("log")
plt.xlabel("epochs")
plt.ylabel("loss")
plt.legend()
plt.show()

In [None]:
data_test = pd.read_table('TrainingData_101.txt', delimiter=' ', dtype='float', header=None)
data_test = np.array(data_test.iloc[:,:])
data_test_norm = []
for i in range(data_test.shape[1]):
    data_test_norm.append(((data_test[:,i]-data_test[:,i].mean())/data_test[:,i].std()).reshape(-1,1))

cf_test = my_network_1(torch.tensor(np.concatenate(data_test_norm[:8], axis=1)).float()).reshape(-1,).detach()
cf_test = cf_test*data_test[:,8].std() + data_test[:,8].mean()

cf_train = my_network_1(torch.tensor(np.concatenate(data_train[:8], axis=1)).float()).reshape(-1,).detach()
cf_train = cf_train*data_std[8] + data_mean[8]

relative_error_train = torch.mean((cf_train - torch.tensor(data[:training_size,8]))**2)/torch.mean(torch.tensor(data[:training_size,8])**2)
print("Relative Training Error: ", relative_error_train.detach().numpy()**0.5*100, "%")

if perc != 0:
    cf_val = my_network_1(torch.tensor(np.concatenate(data_val[:8], axis=1)).float()).reshape(-1,).detach()
    cf_val = cf_val*data_std[8] + data_mean[8]
    relative_error_train = torch.mean((cf_val - torch.tensor(data[training_size:,8]))**2)/torch.mean(torch.tensor(data[training_size:,8])**2)
    print("Relative Validation Error: ", relative_error_train.detach().numpy()**0.5*100, "%")

relative_error_test = torch.mean((cf_test - torch.tensor(data_test[:,8]))**2)/torch.mean(torch.tensor(data_test[:,8])**2)
print("Relative Testing Error: ", relative_error_test.detach().numpy()**0.5*100, "%")

In [None]:
plt.figure()
sns.distplot(data[:,8], label='train')
sns.distplot(cf_train.detach(), label='pred on train')
sns.distplot(data_test[:,8], label='test')
sns.distplot(cf_test.detach(), label='pred on test')
plt.legend()
plt.show()

# Sobol points to variables

In [None]:
sobol_pts = pd.read_table('samples_sobol.txt', delimiter=' ', dtype='float', header=None)
temp = np.zeros((len(data),sobol_pts.shape[1]))
y = []
y_mean, y_std = [], []
for j in range(temp.shape[1]):
    temp[:,j] = sobol_pts.iloc[:len(data),j]
    y_mean.append(temp[:,j].mean())
    y_std.append(temp[:,j].std())
    #y.append(torch.tensor(temp[:,j]).reshape(-1,1))
    y.append(torch.tensor((temp[:,j]-y_mean[-1])/y_std[-1]).reshape(-1,1))

In [None]:
batch_size = y[0].shape[0]
n_epochs_2 = 500
my_network_2, history_2 = [], []
for t in range(temp.shape[1]):
    training = DataLoader(torch.utils.data.TensorDataset(y[t], torch.tensor(data_norm[t])), batch_size=batch_size, shuffle=True)
    my_network_2.append(NeuralNet_Seq(input_dimension=y[t].shape[1], output_dimension=data_norm[t].shape[1], n_hidden_layers=1, neurons=10))
    init_xavier(my_network_2[t], retrain + np.random.randint(-retrain,retrain))
    optimizer = optim.LBFGS(my_network_2[t].parameters(), lr=0.1, max_iter=1, max_eval=50000, tolerance_change=1.0 * np.finfo(float).eps)
    history_2.append(fit(my_network_2[t], training, n_epochs_2, optimizer, p=2, reg_param=0.00, verbose=False))
    print('Training variable ', t+1, 'done.')

# Final prediction

In [None]:
y_test = pd.read_table('TestingData.txt', delimiter=' ', dtype='float', header=None)
y_test = np.array(y_test.iloc[:,:])
y_test_norm, var_pred = [], []
for i in range(y_test.shape[1]):
    #y_test_norm.append(torch.tensor(y_test[:,i]).reshape(-1,1))
    y_test_norm.append(torch.tensor((y_test[:,i]-y_test[:,i].mean())/y_test[:,i].std()).reshape(-1,1))
    var_pred.append(my_network_2[i](y_test_norm[i].float()).reshape(-1,).detach())
    var_pred[-1] = ((var_pred[-1] - var_pred[-1].mean())/var_pred[-1].std()).reshape(-1,1)

cf = my_network_1(torch.tensor(np.concatenate(var_pred,axis=1))).detach().numpy()
cf = cf*data_std[8] + data_mean[8]

df = pd.DataFrame(np.array(cf.reshape(-1,)))
df.to_csv('Task2.txt', header = False, index = None)

In [None]:
plt.figure()
sns.distplot(data[:,8], label='train')
sns.distplot(data_test[:,8], label='previous test')
sns.distplot(cf, label='final pred')
plt.legend()
plt.show()