In [2]:
import matplotlib.pyplot as plt
import gymnasium as gym
import seaborn as sns
import numpy as np
import torch
from torch import nn
from torch.distributions.categorical import Categorical
from torch import optim



In [3]:
DEVICE = "cpu"

In [4]:
class ActorCriticNetwork(nn.Module):
    def __init__(self, obs_space_size, action_space_size):
        super().__init__()
        
        self.shared_layers = nn.Sequential(
            nn.Linear(obs_space_size, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
        )
        
        self.policy_layers = nn.Sequential(
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, action_space_size),
        )
        
        self.value_layers = nn.Sequential(
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
        )
        
        def value(self, obs):
            z = self.shared_layers(obs)
            value = self.value_layers(z)
            return value
        
        def policy(self, obs):
            z = self.shared_layers(obs)
            policy_logits = self.policy_layers(z)
            return policy_logits
        
        def forward(self, obs):
            z = self.shared_layers(obs)
            policy_logits = self.policy_layers(z)
            value = self.value_layers(z)
            return policy_logits, value

In [4]:
def rollout(model, env, max_steps=1000):
    
    train_data = []
    obs = env.reset()
    
    ep_reward = 0
    
    for _ in range(max_steps):
        logits, val = model(torch.tensor([obs], dtype=torch.float32,
                                         device=DEVICE))
        act_distribution = Categorical(logits=logits)
        act = act_distribution.sample()
        act_log_prob = act_distribution.log_prob(act).item()
        
        next_obs, reward, done, _ = env.step(act.item())
        
        obs = next_obs
        ep_reward += reward
        if done:
            break
            
    return train_data, ep_reward

In [5]:
env = gym.make('CartPole-v1')
model = ActorCriticNetwork(env.observation_space.shape[0], env.action_space.n)
model = model.to(DEVICE)
train_data, reward = rollout(model, env)

  logits, val = model(torch.tensor([obs], dtype=torch.float32,


ValueError: expected sequence of length 4 at dim 2 (got 0)

In [5]:
def one_hot_encode_sequence(sequence):
    # Initialize a matrix of zeros with shape (sequence_length, 20)
    one_hot_matrix = np.zeros((len(sequence), 20))

    for idx, aa in enumerate(sequence):
        if aa in aa_to_index:
            # Set the appropriate element to 1
            one_hot_matrix[idx, aa_to_index[aa]] = 1
        else:
            # Handle unknown or non-standard amino acids if necessary
            print(f"Unknown amino acid: {aa}")
    return one_hot_matrix

amino_acids = 'ACDEFGHIKLMNPQRSTVWY'
aa_to_index = {aa: idx for idx, aa in enumerate(amino_acids)}

sequence = "ACGJQRTYE"
one_hot_encoded_sequence = one_hot_encode_sequence(sequence)

print("One-hot encoded sequence:")
print(one_hot_encoded_sequence)


Unknown amino acid: J
One-hot encoded sequence:
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
