# DNA RL: Crossover based optimization algorithm

Importing Libraries

In [1]:
import dna_env
import a2c
import torch
import variable as v
from tqdm import tqdm
import utils
import matplotlib.pyplot as plt
from copy import deepcopy

Initializing Environment

In [2]:
sequence_length = 36
render = True
action_constraints = {'co_length': (10, 20), 
                      'opt_start_point': (0, 35), 
                      'co_start_point': (0, 35)}
early_stop_reward = {'reward_low_threshold': -1, 
                     'reward_high_threshold': .8, 
                     'patience': 200}
env = dna_env.DnaRLEnv(action_constraints, early_stop_reward, sequence_length=sequence_length)
env.reset()
train_session = None

In [3]:
seed = 23
state_dim = env.get_state_dim()
action_dim = env.get_action_dim()
print(f"state dim: {state_dim}")
print(f"action dim: {action_dim}")

state dim: (400,)
action dim: (3, 36)


Creating our agent

In [4]:
critic_init = {
    'seed': seed,
    'action_dim': action_dim[0],
    'network': {
        'i_size': state_dim[0],
        'l1_size': 200,
        'l2_size': 300,
        'l3_size': 100
               }
}
actor_init = {
    'entropy_learning_rate': 0.1,
    'seed': seed,
    'action_dim': action_dim[0],
    'network': {
        'i_size': state_dim[0], 
        'l1_size': 200,        
        'hidden_size': 150, 
        'o_size': sequence_length
               }
}
a2c_init = {
    'seed': seed,
    'discount_factor': .99, 
    'state_dim': state_dim, 
    'action_space': 3, 
    'actor': actor_init, 
    'critic': critic_init
}

In [5]:
if not train_session:
    print('Train Session reset')
    train_session = utils.TrainSession({}, env, seed)

Train Session reset


In [6]:
agents_created = {
    'a2c_agent': a2c.A2C(a2c_init)
}
manual_agent_names = train_session.append_agents(agents_created)

In [7]:
all_agent_names = train_session.agents.keys()
s = '\n- '.join(all_agent_names)
print(f"All Agents: \n- {s}")

All Agents: 
- a2c_agent


In [8]:
n_episode = 10
t_max_per_episode = 1000
graphical = True

In [9]:
train_session.train(n_episode=n_episode, t_max_per_episode=t_max_per_episode, 
                    graphical=graphical, agent_subset=manual_agent_names)

  0%|          | 0/10 [00:00<?, ?it/s]

opt seq	:	 ['C' 'G' 'A' 'T' 'G' 'A' 'C' 'T' 'A' 'C' 'C' 'G' 'T' 'C' 'G' 'C' 'T' 'A'
 'T' 'G' 'C' 'T' 'T' 'A' 'A' 'T' 'A' 'T' 'C' 'G' 'G' 'T' 'C' 'T' 'C' 'C']
co seq	:	 ['C' 'A' 'T' 'T' 'A' 'C' 'C' 'G' 'C' 'C' 'A' 'G' 'G' 'T' 'G' 'G' 'A' 'T'
 'C' 'A' 'A' 'T' 'A' 'G' 'G' 'C' 'G' 'G' 'A' 'T' 'G' 'A' 'G' 'A' 'A' 'G']





RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [100, 1]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

In [36]:
train_session.plot_results(window=200, agent_subset=manual_agent_names)

AttributeError: 'Critic' object has no attribute 'loss_history'

In [11]:
state = env.reset()

In [13]:
(1, ) + state.shape

(1, 400)

In [10]:
a = torch.tensor([11, 23,  3], device=v.device)

In [13]:
a.cpu().numpy()

array([11, 23,  3])

In [17]:
a.prod()

tensor(759, device='cuda:0')