In [2]:
import numpy as np
import pandas as pd
from chromatography import *
from separation_utility import *
import torch
from torch import optim, tensor
import torch.nn as nn

import matplotlib.pyplot as plt
import time

In [3]:
alists = []
alists.append(pd.read_csv('../data/GilarSample.csv'))
alists.append(pd.read_csv('../data/Alizarin.csv'))
alists.append(pd.read_csv('../data/Peterpeptides.csv'))
alists.append(pd.read_csv('../data/Roca.csv'))
alists.append(pd.read_csv('../data/Peter32.csv'))
alists.append(pd.read_csv('../data/Eosin.csv'))
alists.append(pd.read_csv('../data/Controlmix2.csv'))
alists.append(pd.read_csv('../data/Gooding.csv'))
# GilarSample - 8 analytes
# Peterpeptides - 32 analytes
# Roca - 14 analytes
# Peter32 - 32 analytes
# Eosin - 20 analytes
# Alizarin - 16 analytes
# Controlmix2 - 17 analytes
# Gooding - 872 analytes

### Experiment: Performance vs n_steps

In [4]:
# Parameters
all_analytes = pd.concat(alists, sort=True).reset_index()[['k0', 'S', 'lnk0']]

kwargs = {
    'num_episodes' : 25_000, 
    'sample_size' : 10,
    'batch_size' : 1, 
    'lr' : .05, 
    'optim' : torch.optim.SGD,
    'lr_decay_factor' : 0.75,
    'lr_milestones' : 5000,
    'print_every' : 25_001,
    'baseline' : .55,
    'max_norm' : 1.5,
    'max_rand_analytes' : 40,
    'min_rand_analytes' : 8,
    'rand_prob' : 1.,
    'h' : 0.001,
    'run_time' : 1.
}
N = 7
M = 15

losses_50_50 = np.zeros((N, M, kwargs['num_episodes']))
test_losses_50_50 = np.zeros((N, M, kwargs['num_episodes']))
losses_100 = np.zeros((N, M, kwargs['num_episodes']))

for n in range(0, N):
    print(n)
    delta_taus = np.ones(n + 1) * 1/(n + 1)
    
    for i in range(M):
        alist_train = all_analytes.sample(frac=0.5)
        alist_test = all_analytes.loc[lambda a: ~a.index.isin(alist_train.index.values)]
        print(f"  {i}")
        #Policies
        pol_50_50 = PolicyGeneral(
            phi = nn.Sequential(
                PermEqui2_max(2, 5),
                nn.ELU(inplace=True),
                PermEqui2_max(5, 5),
                nn.ELU(inplace=True),
                PermEqui2_max(5, 5),
                nn.ELU(inplace=True),
            ),
            rho = nn.Sequential(
                nn.Linear(5, 5),
                nn.ELU(inplace=True),
                nn.Linear(5, 5),
                nn.ELU(inplace=True),
                Rho(n_steps=len(delta_taus), hidden=5, in_dim=5, sigma_max=.3, sigma_min=.01),
            )
        )
        pol_100 = PolicyGeneral(
            phi = nn.Sequential(
                PermEqui2_max(2, 5),
                nn.ELU(inplace=True),
                PermEqui2_max(5, 5),
                nn.ELU(inplace=True),
                PermEqui2_max(5, 5),
                nn.ELU(inplace=True),
            ),
            rho = nn.Sequential(
                nn.Linear(5, 5),
                nn.ELU(inplace=True),
                nn.Linear(5, 5),
                nn.ELU(inplace=True),
                Rho(n_steps=len(delta_taus), hidden=5, in_dim=5, sigma_max=.3, sigma_min=.01),
            )
        )
        # Run Exp
        loss, loss_test = reinforce_gen(
            random_alist = alist_train, 
            test_alist = alist_test,
            policy = pol_50_50, 
            delta_taus = delta_taus, 
            **kwargs
        )
        loss_100, _ = reinforce_gen(
            random_alist = all_analytes, 
            test_alist = None,
            policy = pol_100, 
            delta_taus = delta_taus, 
            **kwargs
        )
        
        losses_50_50[n,i] = loss
        test_losses_50_50[n,i] = loss_test
        losses_100[n,i] = loss_100


0
  0


  return delta_tau_phi * (1 + self.k(phi)) / self.k(phi)


KeyboardInterrupt: 

In [None]:
#np.savez_compressed("../results/general_perf_vs_n_steps", losses_50_50=losses_50_50, test_losses_50_50=test_losses_50_50, losses_100=losses_100)
np.savez_compressed("../results/general_perf_vs_n_steps_losses_50", losses_50_50=losses_50_50)
np.savez_compressed("../results/general_perf_vs_n_steps_test_losses_50_50", test_losses_50_50=test_losses_50_50)
np.savez_compressed("../results/general_perf_vs_n_steps_losses_100", losses_100=losses_100)

### Performance vs number of analytes

In [None]:
# Parameters
all_analytes = pd.concat(alists, sort=True).reset_index()[['k0', 'S', 'lnk0']]

kwargs = {
    'num_episodes' : 25_000, 
    'sample_size' : 10,
    'batch_size' : 1, 
    'lr' : .05, 
    'optim' : torch.optim.SGD,
    'lr_decay_factor' : 0.75,
    'lr_milestones' : 5000,
    'print_every' : 25_001,
    'baseline' : .55,
    'max_norm' : 1.5,
    'rand_prob' : 1.,
    'h' : 0.001,
    'run_time' : 1.
}
N = 5
M = 30

losses_50_50 = np.zeros((N, M, kwargs['num_episodes']))
test_losses_50_50 = np.zeros((N, M, kwargs['num_episodes']))
losses_100 = np.zeros((N, M, kwargs['num_episodes']))


delta_taus = np.ones(10) * 1/(10)
for n in range(N):
    for i in range(M):
        alist_train = all_analytes.sample(frac=0.5)
        alist_test = all_analytes.loc[lambda a: ~a.index.isin(alist_train.index.values)]
        print(f"  {i}")
        #Policies
        pol_50_50 = PolicyGeneral(
            phi = nn.Sequential(
                PermEqui2_max(2, 5),
                nn.ELU(inplace=True),
                PermEqui2_max(5, 5),
                nn.ELU(inplace=True),
                PermEqui2_max(5, 5),
                nn.ELU(inplace=True),
            ),
            rho = nn.Sequential(
                nn.Linear(5, 5),
                nn.ELU(inplace=True),
                nn.Linear(5, 5),
                nn.ELU(inplace=True),
                Rho(n_steps=len(delta_taus), hidden=5, in_dim=5, sigma_max=.3, sigma_min=.01),
            )
        )
        pol_100 = PolicyGeneral(
            phi = nn.Sequential(
                PermEqui2_max(2, 5),
                nn.ELU(inplace=True),
                PermEqui2_max(5, 5),
                nn.ELU(inplace=True),
                PermEqui2_max(5, 5),
                nn.ELU(inplace=True),
            ),
            rho = nn.Sequential(
                nn.Linear(5, 5),
                nn.ELU(inplace=True),
                nn.Linear(5, 5),
                nn.ELU(inplace=True),
                Rho(n_steps=len(delta_taus), hidden=5, in_dim=5, sigma_max=.3, sigma_min=.01),
            )
        )
        # Run Exp
        loss, loss_test = reinforce_gen(
            alists = [alist_train], 
            test_alist = alist_test,
            policy = pol_50_50, 
            delta_taus = delta_taus,
            min_rand_analytes = 8 * (n + 1),
            max_rand_analytes = 8 * (n + 1),
            **kwargs
        )
        loss_100, _ = reinforce_gen(
            alists = [all_analytes], 
            test_alist = None,
            policy = pol_100, 
            delta_taus = delta_taus,
            min_rand_analytes = 8 * (n + 1),
            max_rand_analytes = 8 * (n + 1),
            **kwargs
        )

        losses_50_50[n,i] = loss
        test_losses_50_50[n,i] = loss_test
        losses_100[n,i] = loss_100


In [None]:
np.savez_compressed("../results/general_perf_vs_nr_analytes_losses_50", losses_50_50=losses_50_50)
np.savez_compressed("../results/general_perf_vs_nr_analytes_test_losses_50_50", test_losses_50_50=test_losses_50_50)
np.savez_compressed("../results/general_perf_vs_nr_analytes_losses_100", losses_100=losses_100)

### Performance vs architecture DeepSet(phi)

In [None]:
# Parameters
all_analytes = pd.concat(alists, sort=True).reset_index()[['k0', 'S', 'lnk0']]
activations = [nn.ELU, nn.ReLU, nn.Tanh]
width = [5, 10, 20]
kwargs = {
    'num_episodes' : 1, #25_000, 
    'sample_size' : 10,
    'batch_size' : 1, 
    'lr' : .05, 
    'optim' : torch.optim.SGD,
    'lr_decay_factor' : 0.75,
    'lr_milestones' : 5000,
    'print_every' : 25_001,
    'baseline' : .55,
    'max_norm' : 1.5,
    'max_rand_analytes' : 40,
    'min_rand_analytes' : 8,
    'rand_prob' : 1.,
    'h' : 0.001,
    'run_time' : 1.
}
N = 9
M = 20

losses_deep_set = np.zeros((N, M, kwargs['num_episodes']))
test_losses_deep_set = np.zeros((N, M, kwargs['num_episodes']))

delta_taus = np.ones(10) * 1/(10)
for i in range(N):
    print(f"{i}")
    for m in range(M):
        alist_train = all_analytes.sample(frac=0.5)
        alist_test = all_analytes.loc[lambda a: ~a.index.isin(alist_train.index.values)]
        print(f"  {m}")
        #Policies
        pol_50_50 = PolicyGeneral(
            phi = nn.Sequential(
                PermEqui2_max(2, width[i % 3]),
                activations[i // 3](),
                PermEqui2_max(width[i % 3], width[i % 3]),
                activations[i // 3](),
                PermEqui2_max(width[i % 3], width[i % 3]),
                activations[i // 3](),
            ),
            rho = nn.Sequential(
                nn.Linear(width[i % 3], 5),
                nn.ELU(inplace=True),
                nn.Linear(5, 5),
                nn.ELU(inplace=True),
                Rho(n_steps=len(delta_taus), hidden=5, in_dim=5, sigma_max=.3, sigma_min=.01),
            )
        )
        
        # Run Exp
        loss, loss_test = reinforce_gen(
            alists = [alist_train], 
            test_alist = alist_test,
            policy = pol_50_50, 
            delta_taus = delta_taus, 
            **kwargs
        )

        losses_deep_set[i, m] = loss
        test_losses_deep_set[i, m] = loss_test

In [None]:
np.savez_compressed("../results/general_perf_deep_set_arch_loss_50_50", losses_50_50=losses_deep_set)
np.savez_compressed("../results/general_perf_deep_set_arch_test_losses_50_50", test_losses_50_50=test_losses_deep_set)

### Performance vs architecture Program(rho)

In [None]:
import torch 
import torch.nn.functional as F

In [None]:
class RhoReLU(nn.Module):
    def __init__(self, 
            n_steps: int, 
            hidden: int, 
            in_dim: int = 2, 
            sigma_max: float = .3, 
            sigma_min: float = .1
        ) -> None:
        """
        Constructor for PolicyTime torch Module.

        Parameters
        ----------
        n_steps: int
            Number of steps for piece-wise constant solvent strength program.
        hidden: int
            Number of nodes for the hidden layers
        in_dim: int
            length of the encoded analyte set (embedding), it is the input 
            to this network.
        sigma_min: float
            Minimal standard deviation of the solvent strength search space.
            Default value .0. (max value < 1.0)
        sigma_max: float
            Maximal standard deviation of the solvent strength search space.
            Default value .2. (max value is 1.0)
        """
        super().__init__()
        
        self.n_steps = n_steps
        self.hidden = hidden
        self.sigma_min = sigma_min
        self.sigma_max = sigma_max

        self.sig = nn.Sigmoid()
        self.fc_mu_1 = nn.Linear(in_dim, hidden)
        self.fc_mu_2 = nn.Linear(hidden, n_steps)
        self.fc_sig_1 = nn.Linear(in_dim, hidden)
        self.fc_sig_2 = nn.Linear(hidden, n_steps)
          
    def forward(self, x):
        mu = F.relu(self.fc_mu_1(x))
        sigma = F.relu(self.fc_sig_1(x))
        
        mu = self.sig(self.fc_mu_2(mu)).squeeze(0)
        # limit sigma to be in range (sigma_min; sigma_max)
        sigma = self.sig(self.fc_sig_2(sigma)).squeeze(0) * (self.sigma_max - self.sigma_min) + self.sigma_min
        return mu, sigma

In [None]:
class RhoELU(nn.Module):
    def __init__(self, 
            n_steps: int, 
            hidden: int, 
            in_dim: int = 2, 
            sigma_max: float = .3, 
            sigma_min: float = .1
        ) -> None:
        """
        Constructor for PolicyTime torch Module.

        Parameters
        ----------
        n_steps: int
            Number of steps for piece-wise constant solvent strength program.
        hidden: int
            Number of nodes for the hidden layers
        in_dim: int
            length of the encoded analyte set (embedding), it is the input 
            to this network.
        sigma_min: float
            Minimal standard deviation of the solvent strength search space.
            Default value .0. (max value < 1.0)
        sigma_max: float
            Maximal standard deviation of the solvent strength search space.
            Default value .2. (max value is 1.0)
        """
        super().__init__()
        
        self.n_steps = n_steps
        self.hidden = hidden
        self.sigma_min = sigma_min
        self.sigma_max = sigma_max

        self.sig = nn.Sigmoid()
        self.fc_mu_1 = nn.Linear(in_dim, hidden)
        self.fc_mu_2 = nn.Linear(hidden, n_steps)
        self.fc_sig_1 = nn.Linear(in_dim, hidden)
        self.fc_sig_2 = nn.Linear(hidden, n_steps)
          
    def forward(self, x):
        mu = F.elu(self.fc_mu_1(x))
        sigma = F.elu(self.fc_sig_1(x))
        
        mu = self.sig(self.fc_mu_2(mu)).squeeze(0)
        # limit sigma to be in range (sigma_min; sigma_max)
        sigma = self.sig(self.fc_sig_2(sigma)).squeeze(0) * (self.sigma_max - self.sigma_min) + self.sigma_min
        return mu, sigma

In [None]:
class RhoTanh(nn.Module):
    def __init__(self, 
            n_steps: int, 
            hidden: int, 
            in_dim: int = 2, 
            sigma_max: float = .3, 
            sigma_min: float = .1
        ) -> None:
        """
        Constructor for PolicyTime torch Module.

        Parameters
        ----------
        n_steps: int
            Number of steps for piece-wise constant solvent strength program.
        hidden: int
            Number of nodes for the hidden layers
        in_dim: int
            length of the encoded analyte set (embedding), it is the input 
            to this network.
        sigma_min: float
            Minimal standard deviation of the solvent strength search space.
            Default value .0. (max value < 1.0)
        sigma_max: float
            Maximal standard deviation of the solvent strength search space.
            Default value .2. (max value is 1.0)
        """
        super().__init__()
        
        self.n_steps = n_steps
        self.hidden = hidden
        self.sigma_min = sigma_min
        self.sigma_max = sigma_max

        self.sig = nn.Sigmoid()
        self.fc_mu_1 = nn.Linear(in_dim, hidden)
        self.fc_mu_2 = nn.Linear(hidden, n_steps)
        self.fc_sig_1 = nn.Linear(in_dim, hidden)
        self.fc_sig_2 = nn.Linear(hidden, n_steps)
          
    def forward(self, x):
        mu = torch.tanh(self.fc_mu_1(x))
        sigma = torch.tanh(self.fc_sig_1(x))
        
        mu = self.sig(self.fc_mu_2(mu)).squeeze(0)
        # limit sigma to be in range (sigma_min; sigma_max)
        sigma = self.sig(self.fc_sig_2(sigma)).squeeze(0) * (self.sigma_max - self.sigma_min) + self.sigma_min
        return mu, sigma

In [None]:
# Parameters
all_analytes = pd.concat(alists, sort=True).reset_index()[['k0', 'S', 'lnk0']]
activations = [nn.ELU, nn.ReLU, nn.Tanh]
width = [5, 10, 20]
Rhos = [RhoELU, RhoReLU, RhoTanh]
kwargs = {
    'num_episodes' : 25_000, 
    'sample_size' : 10,
    'batch_size' : 1, 
    'lr' : .05, 
    'optim' : torch.optim.SGD,
    'lr_decay_factor' : 0.75,
    'lr_milestones' : 5000,
    'print_every' : 25_001,
    'baseline' : .55,
    'max_norm' : 1.5,
    'max_rand_analytes' : 40,
    'min_rand_analytes' : 8,
    'rand_prob' : 1.,
    'h' : 0.001,
    'run_time' : 1.
}
N = 9
M = 20

losses_rho = np.zeros((N, M, kwargs['num_episodes']))
test_losses_rho = np.zeros((N, M, kwargs['num_episodes']))

delta_taus = np.ones(10) * 1/(10)
for i in range(N):
    print(f"{i}")
    for m in range(M):
        alist_train = all_analytes.sample(frac=0.5)
        alist_test = all_analytes.loc[lambda a: ~a.index.isin(alist_train.index.values)]
        print(f"  {m}")
        #Policies
        pol_50_50 = PolicyGeneral(
            phi = nn.Sequential(
                PermEqui2_max(2, 5),
                nn.ELU(inplace=True),
                PermEqui2_max(5, 5),
                nn.ELU(inplace=True),
                PermEqui2_max(5, 5),
                nn.ELU(inplace=True),
            ),
            rho = nn.Sequential(
                nn.Linear(5, width[i % 3]),
                activations[i // 3](),
                nn.Linear(width[i % 3], width[i % 3]),
                activations[i // 3](),
                Rhos[i // 3](n_steps=len(delta_taus), hidden=width[i % 3], in_dim=width[i % 3], sigma_max=.3, sigma_min=.01),
            )
        )
        
        # Run Exp
        loss, loss_test = reinforce_gen(
            alists = [alist_train], 
            test_alist = alist_test,
            policy = pol_50_50, 
            delta_taus = delta_taus, 
            **kwargs
        )

        losses_rho[i, m] = loss
        test_losses_rho[i, m] = loss_test

In [None]:
np.savez_compressed("../results/general_perf_rho_arch_loss_50_50", losses_50_50=losses_rho)
np.savez_compressed("../results/general_perf_rho_arch_test_losses_50_50", test_losses_50_50=test_losses_rho)

### Distribution of results + Fine Tuning (not in training)

In [1]:
# Parameters
all_analytes = pd.concat(alists[3:], sort=True).reset_index()[['k0', 'S', 'lnk0']]

kwargs = {
    'num_episodes' : 25_000, 
    'sample_size' : 10,
    'batch_size' : 1, 
    'lr' : .05, 
    'optim' : torch.optim.SGD,
    'lr_decay_factor' : 0.75,
    'lr_milestones' : 5000,
    'print_every' : 25_001,
    'baseline' : .55,
    'max_norm' : 1.5,
    'max_rand_analytes' : 40,
    'min_rand_analytes' : 8,
    'rand_prob' : 1.,
    'h' : 0.001,
    'run_time' : 1.
}
kwargs_ft = {
    'num_episodes' : 6000, 
    'sample_size':  10, 
    'lr': .05, 
    'optim' : torch.optim.SGD,
    'lr_decay_factor': .75,
    'lr_milestones':  1000,
    'print_every':  6001,
    'baseline': 0.55,
    'max_norm': 1.5,
    'beta': .0,
    'weights': [1., 1.],
    'h': .001,
    'run_time' : 1.  
}
N = 300

# Experiments
exp_8 = ExperimentAnalytes(k0 = alists[0].k0.values, S = alists[0].S.values, h=0.001, run_time=1.0)
exp_16 = ExperimentAnalytes(k0 = alists[1].k0.values, S = alists[1].S.values, h=0.001, run_time=1.0)
exp_32 = ExperimentAnalytes(k0 = alists[2].k0.values, S = alists[2].S.values, h=0.001, run_time=1.0)
# Final Results 
dist_8 = np.zeros((N,))
dist_16 = np.zeros((N,))
dist_32 = np.zeros((N,))
dist_ft_8 = np.zeros((N,))
dist_ft_16 = np.zeros((N,))
dist_ft_32 = np.zeros((N,))

for n in range(0, N):
    delta_taus = np.ones(10) * 1/(10)
    print(f"{n}")
    #Policies
    pol = PolicyGeneral(
        phi = nn.Sequential(
            PermEqui2_max(2, 5),
            nn.Tanh(),
            PermEqui2_max(5, 5),
            nn.Tanh(),
            PermEqui2_max(5, 5),
            nn.Tanh(),
        ),
        rho = nn.Sequential(
            nn.Linear(5, 5),
            nn.Tanh(),
            nn.Linear(5, 5),
            nn.Tanh(),
            Rho(n_steps=len(delta_taus), hidden=5, in_dim=5, sigma_max=.3, sigma_min=.01, non_linearity=torch.tanh),
        )
    )
    # Run Exp
    reinforce_gen(
        alists = [], 
        random_alist = all_analytes,
        test_alist = None,
        policy = pol, 
        delta_taus = delta_taus, 
        **kwargs
    )
    
    mu_8, _ = pol.forward(torch.tensor(alists[0][['S', 'lnk0']].values, dtype=torch.float32))
    mu_16, _ = pol.forward(torch.tensor(alists[1][['S', 'lnk0']].values, dtype=torch.float32))
    mu_32, _ = pol.forward(torch.tensor(alists[2][['S', 'lnk0']].values, dtype=torch.float32))
    exp_8.reset()
    exp_16.reset()
    exp_32.reset()
    
    exp_8.run_all(mu_8.tolist(), delta_taus)
    exp_16.run_all(mu_16.tolist(), delta_taus)
    exp_32.run_all(mu_32.tolist(), delta_taus)
    dist_8[n] = exp_8.loss()
    dist_16[n] = exp_16.loss()
    dist_32[n] = exp_32.loss()
    
    _,_,mu_8,_,_ = reinforce_single_from_gen(
        alist = alists[0], 
        policy= pol, 
        delta_taus= delta_taus,   
        **kwargs_ft
    )
    
    _,_,mu_16,_,_ = reinforce_single_from_gen(
        alist = alists[1], 
        policy= pol, 
        delta_taus= delta_taus,   
        **kwargs_ft
    )
    
    _,_,mu_32,_,_ = reinforce_single_from_gen(
        alist = alists[2], 
        policy= pol, 
        delta_taus= delta_taus,   
        **kwargs_ft
    )
    
    exp_8.reset()
    exp_8.run_all(mu_8[-1], delta_taus)
    exp_16.reset()
    exp_16.run_all(mu_16[-1], delta_taus)
    exp_32.reset()
    exp_32.run_all(mu_32[-1], delta_taus)
    
    dist_ft_8[n] = exp_8.loss()
    dist_ft_16[n] = exp_16.loss()
    dist_ft_32[n] = exp_32.loss()

(
    np.savez_compressed(
        "../results/general_dist_not_in_train", 
        dist_8=dist_8, 
        dist_16=dist_16, 
        dist_32=dist_32, 
        dist_ft_8=dist_ft_8, 
        dist_ft_16=dist_ft_16, 
        dist_ft_32=dist_ft_32, 
        )
)

NameError: name 'pd' is not defined

### Distribution of results + Fine Tuning (in training)

In [None]:
# Parameters
all_analytes = pd.concat(alists, sort=True).reset_index()[['k0', 'S', 'lnk0']]

kwargs = {
    'num_episodes' : 25_000, 
    'sample_size' : 10,
    'batch_size' : 1, 
    'lr' : .05, 
    'optim' : torch.optim.SGD,
    'lr_decay_factor' : 0.75,
    'lr_milestones' : 5000,
    'print_every' : 25_001,
    'baseline' : .55,
    'max_norm' : 1.5,
    'max_rand_analytes' : 40,
    'min_rand_analytes' : 8,
    'rand_prob' : .8,
    'h' : 0.001,
    'run_time' : 1.
}
kwargs_ft = {
    'num_episodes' : 6000, 
    'sample_size':  10, 
    'lr': .05, 
    'optim' : torch.optim.SGD,
    'lr_decay_factor': .75,
    'lr_milestones':  1000,
    'print_every':  6001,
    'baseline': 0.55,
    'max_norm': 1.5,
    'beta': .0,
    'weights': [1., 1.],
    'h': .001,
    'run_time' : 1.  
}
N = 300

# Experiments
exp_8 = ExperimentAnalytes(k0 = alists[0].k0.values, S = alists[0].S.values, h=0.001, run_time=1.0)
exp_16 = ExperimentAnalytes(k0 = alists[1].k0.values, S = alists[1].S.values, h=0.001, run_time=1.0)
exp_32 = ExperimentAnalytes(k0 = alists[2].k0.values, S = alists[2].S.values, h=0.001, run_time=1.0)
# Final Results 
dist_8 = np.zeros((N,))
dist_16 = np.zeros((N,))
dist_32 = np.zeros((N,))
dist_ft_8 = np.zeros((N,))
dist_ft_16 = np.zeros((N,))
dist_ft_32 = np.zeros((N,))

for n in range(0, N):
    delta_taus = np.ones(10) * 1/(10)
    print(f"{n}")
    #Policies
    pol = PolicyGeneral(
        phi = nn.Sequential(
            PermEqui2_max(2, 5),
            nn.Tanh(),
            PermEqui2_max(5, 5),
            nn.Tanh(),
            PermEqui2_max(5, 5),
            nn.Tanh(),
        ),
        rho = nn.Sequential(
            nn.Linear(5, 5),
            nn.Tanh(),
            nn.Linear(5, 5),
            nn.Tanh(),
            Rho(n_steps=len(delta_taus), hidden=5, in_dim=5, sigma_max=.3, sigma_min=.01, non_linearity=torch.tanh),
        )
    )
    # Run Exp
    reinforce_gen(
        alists = alists[0:3], 
        random_alist = all_analytes,
        test_alist = None,
        policy = pol, 
        delta_taus = delta_taus, 
        **kwargs
    )
    
    mu_8, _ = pol.forward(torch.tensor(alists[0][['S', 'lnk0']].values, dtype=torch.float32))
    mu_16, _ = pol.forward(torch.tensor(alists[1][['S', 'lnk0']].values, dtype=torch.float32))
    mu_32, _ = pol.forward(torch.tensor(alists[2][['S', 'lnk0']].values, dtype=torch.float32))
    exp_8.reset()
    exp_16.reset()
    exp_32.reset()
    
    exp_8.run_all(mu_8.tolist(), delta_taus)
    exp_16.run_all(mu_16.tolist(), delta_taus)
    exp_32.run_all(mu_32.tolist(), delta_taus)
    dist_8[n] = exp_8.loss()
    dist_16[n] = exp_16.loss()
    dist_32[n] = exp_32.loss()
    
        
    _,_,mu_8,_,_ = reinforce_single_from_gen(
        alist = alists[0], 
        policy= pol, 
        delta_taus= delta_taus,   
        **kwargs_ft
    )
    
    _,_,mu_16,_,_ = reinforce_single_from_gen(
        alist = alists[1], 
        policy= pol, 
        delta_taus= delta_taus,   
        **kwargs_ft
    )
    
    _,_,mu_32,_,_ = reinforce_single_from_gen(
        alist = alists[2], 
        policy= pol, 
        delta_taus= delta_taus,   
        **kwargs_ft
    )
    
    exp_8.reset()
    exp_8.run_all(mu_8[-1], delta_taus)
    exp_16.reset()
    exp_16.run_all(mu_16[-1], delta_taus)
    exp_32.reset()
    exp_32.run_all(mu_32[-1], delta_taus)
    
    dist_ft_8[n] = exp_8.loss()
    dist_ft_16[n] = exp_16.loss()
    dist_ft_32[n] = exp_32.loss()
    

(
    np.savez_compressed(
        "../results/general_dist_in_train", 
        dist_8=dist_8, 
        dist_16=dist_16, 
        dist_32=dist_32, 
        dist_ft_8=dist_ft_8, 
        dist_ft_16=dist_ft_16, 
        dist_ft_32=dist_ft_32, 
        )
)