In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import time
from tqdm import tqdm
import os

import polyRBM
import analysisUtility as anaU

## Training routine

### ring chains

In [None]:
chainLengths = [8, 32]
networkSizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500]
parent = os.getcwd()
savepath = ###directory to save training results in###
fname_8 = ###file path of N=8 simulation data###
fname_32 = ###file path of N=32 simulation data###
fnames = [fname_8, fname_32]

for ind, l in enumerate(chainLengths):
    fname = fnames[ind]
    dataset = polyRBM.CubicChainDataset(fname, 2)
    div = int(0.7*len(dataset.data))
    ringbonds = np.zeros((len(dataset.bonds[:, 0, 0]), len(dataset.bonds[0, :, 0])+1, len(dataset.bonds[0, 0, :])))
    ringbonds[:, :-1, :] = dataset.bonds
    ringbonds[:, -1, :] = -np.cumsum(dataset.bonds, axis=1)[:, -1, :]
    dataset.bonds = ringbonds
    dataset.bits = polyRBM.bondsToBits2DYu(dataset.bonds)
    dataset.data = torch.from_numpy(dataset.bits).float()

    testset = dataset.bits[div:int(len(dataset.data))]
    dataset.bits = dataset.bits[:div]
    dataset.bonds = dataset.bonds[:div]
    dataset.data = dataset.data[:div]
    
    for n in networkSizes:
        run = f'rw_exclVol_rings_N{l}_h{int(n)}' #unique identifier is automatically created
        os.mkdir(parent+'/'+savepath+f'repr_results[model={run}]')
        batchsize = 10
        n_vis = 2*(l+1-2)
        n_hidden = int(n)
        k = 2
        rbm_lr = 10**-2
        rbm_epochs= 1
        rbm_verbose = False
        
        train_loader = polyRBM.DataLoader(dataset, batch_size=batchsize, shuffle=True) 
        rbm = polyRBM.RBM(n_vis=n_vis, n_hin=n_hidden, k=k, oneHot=False, chainlength=l)
        
        print(f"START training of RBM [N={l}, n_hidden={int(n)}]")
        print(os.getcwd())
        loss, recon_error, countList = polyRBM.train_RBM(rbm=rbm, train_loader=train_loader, epochs=rbm_epochs, 
                                                 lr=rbm_lr, verbose=rbm_verbose, ninputs=n_vis, 
                                                 save_model=True, save_path=savepath+f'{run}_rbm', 
                                                 testset=torch.from_numpy(testset).float(), momentum=0.5, calc_loops=True)
        np.save(savepath+f'repr_results[model={run}]/{run}_loss.npy', loss)
        np.save(savepath+f'repr_results[model={run}]/{run}_loops.npy', countList)
        all_samples = []
        
        for param in rbm.parameters():
            param.requires_grad = False
            
        for i in range(10):
            initialize = torch.from_numpy(np.random.binomial(1, 0.5, (1000, (l+1-2)*2))).float()
            rbm.k = 800
            samples = rbm(initialize)[2].detach().numpy()
            all_samples.append(samples)
        samples = np.concatenate(all_samples)
        np.save(savepath+f'repr_results[model={run}]/{run}_samples.npy', samples)


### linear chains

In [None]:
chainLengths = [8, 32]
networkSizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500]
parent = os.getcwd()
savepath = ###directory to save training results in###
fname_8 = ###file path of N=8 simulation data###
fname_32 = ###file path of N=32 simulation data###
fnames = [fname_8, fname_32]

for ind, l in enumerate(chainLengths):
    fname = fnames[ind]
    dataset = polyRBM.CubicChainDataset(fname, 2)
    div = int(0.7*len(dataset.data))
    testset = dataset.bits[div:]
    dataset.data = torch.from_numpy(dataset.bits[:div]).float()
    
    for n in tqdm(networkSizes):
        run = f'rw_exclVol_N{l}_h{int(n)}' #unique identifier is automatically created
        os.mkdir(parent+'/'+savepath+f'repr_results[model={run}]')
        batchsize = 10
        n_vis = 2*(l-2)
        n_hidden = int(n)
        k = 2
        rbm_lr = 10**-2
        rbm_epochs= 200
        rbm_verbose = False
        
        train_loader = polyRBM.DataLoader(dataset, batch_size=batchsize, shuffle=True) 
        rbm = polyRBM.RBM(n_vis=n_vis, n_hin=n_hidden, k=k, oneHot=False, chainlength=l)
        
        print(f"START training of RBM [N={l}, n_hidden={int(n)}]")
        print(os.getcwd())
        loss, recon_error, countList = polyRBM.train_RBM(rbm=rbm, train_loader=train_loader, epochs=rbm_epochs, 
                                                 lr=rbm_lr, verbose=rbm_verbose, ninputs=n_vis, 
                                                 save_model=True, save_path=savepath+f'{run}_rbm', 
                                                 testset=torch.from_numpy(testset).float(), momentum=0.5, calc_loops=True)
        np.save(savepath+f'repr_results[model={run}]/{run}_loss.npy', loss)
        np.save(savepath+f'repr_results[model={run}]/{run}_loops.npy', countList)
        all_samples = []
        
        for param in rbm.parameters():
            param.requires_grad = False
        
        for i in tqdm(range(10)):
            initialize = torch.from_numpy(np.random.binomial(1, 0.5, (1000, (l-2)*2))).float()
            rbm.k = 800
            samples = rbm(initialize)[2].detach().numpy()
            all_samples.append(samples)
        samples = np.concatenate(all_samples)
        np.save(savepath+f'repr_results[model={run}]/{run}_samples.npy', samples)

## Quick in-line evaluation of training results

In [None]:
savepath = ###directory to look for training results and data###
run = ###unique identifier of run to be examined###

In [None]:
# plot loss and network weights

rbm = anaU.loader(savepath, run, rbm=True)
loss = anaU.loader(savepath, run, loss=True)
anaU.plot_loss(loss, 0)

weights = rbm.W.detach().numpy().flatten()
v_bias = rbm.v_bias.detach().numpy()
h_bias = rbm.h_bias.detach().numpy()

fig, ax = plt.subplots(2)
ax[0].hist(weights, bins=np.linspace(-5, 5, 100), histtype='step', label='W', density=True)
ax[0].hist(v_bias, bins=np.linspace(-5, 5, 100), histtype='step', label='b_vis', density=True)
ax[0].hist(h_bias, bins=np.linspace(-5, 5, 100), histtype='step', label='b_hid', density=True)
fig.legend()

In [None]:
# plot loop count from training

countList = anaU.loader(savepath, run, loops=True)

fig, ax = plt.subplots(2)
not_zero_loops = np.logical_not((countList==0).all(axis=0))
loops = countList[:, not_zero_loops]

for i in range(len(loops[0, :])):
    loops[:, i][loops[:, i]==0] = None
    ax[0].plot(np.linspace(0, rbm_epochs, rbm_epochs), loops[:, i], label=f'l={np.where(not_zero_loops==True)[0][i]}')
ax[0].set_title('non-zero loop counts')
ax[1].imshow(countList[:, :32+1], interpolation='nearest', aspect='auto')
ax[1].set_title('loop counts per epoch as rows')

fig.legend()
fig.set_size_inches(10, 6)
fig.tight_layout()

In [None]:
# create reconstructions from a trained RBM

rbm = anaU.loader(savepath, run, rbm=True)
initialize = torch.from_numpy(np.random.binomial(1, 0.5, (100, (32+1-2)*2))).float() #initialize Gibss-sampling randomly
#initialize = torch.from_numpy(testset).float() #initialize Gibbs-sampling from valid conformations
rbm.k = 800
for param in rbm.parameters():
        param.requires_grad = False
samples = rbm(initialize)[2].detach().numpy()
samplesTF = polyRBM.makeVecs(polyRBM.bitsToBonds2DYu(samples))
samples_positions = np.cumsum(samplesTF, axis=1)
data = polyRBM.makeVecs(dataset.bonds)

In [None]:
# calculate and plot angle distribution

anaU.plot_angleDistribution(data, samplesTF)

In [None]:
# calculate and plot bond vector correlation over distance

anaU.plot_corr(data, samplesTF)

In [None]:
# calculate and plot squared radius of gyration

rg2_samples = anaU.rgx_calc(samplesTF, len(samplesTF))
rg2_data = anaU.rgx_calc(data, 10000)
anaU.plot_Rg2(rg2_data, rg2_samples)

In [None]:
# calculate and plot distribution of squared end-to-end distance

re2_samples = anaU.endToend_calc(samplesTF, 10000)
re2_data = anaU.endToend_calc(data, 10000)
mean_data = np.mean(re2_data)
mean_samples = np.mean(re2_samples)
print(mean_data, mean_samples, mean_samples/mean_data)
anaU.plot_Re2(re2_data, re2_samples)