# Setup

In [None]:
# Install dependencies
from IPython.display import clear_output

!pip3 install gym
clear_output()

!rm -rf jelly-bean-world/ || true
!git clone https://github.com/NishanthVAnand/jelly-bean-world.git
clear_output()

%cd jelly-bean-world/
clear_output()

!rm -rf jbw/deps/core
!git rm jbw/deps/core
!git submodule add https://github.com/asaparov/core.git jbw/deps/core
clear_output()

!rm -rf jbw/deps/math
!git rm jbw/deps/math
!git submodule add https://github.com/asaparov/math.git jbw/deps/math
clear_output()

!git submodule update --init --recursive

%cd api/python/
!python setup.py install
clear_output()

%cd ../../..

import jbw
import gym

/content


In [None]:
# Mount drive and create shortcut

from google.colab import drive
import sys
import os
import shutil
import warnings

drive.mount('/content/gdrive')
folder = "/content/gdrive/MyDrive/COMP579/Final_Project/" #@param {type:"string"}
!ln -Ts $folder /content/project 2> /dev/null

# Add the assignment folder to Python path
if '/content/project' not in sys.path:
    sys.path.insert(0, '/content/project')

Mounted at /content/gdrive


In [None]:
%cd project/

/content/gdrive/MyDrive/COMP579/Final_Project


In [None]:
import numpy as np
import sys
print(np.__version__)
print(sys.version)

1.21.5
3.7.13 (default, Mar 16 2022, 17:37:17) 
[GCC 7.5.0]


# Training Code

In [None]:
from dataclasses import dataclass
from train_agent import evaluate_agent, get_environment, train_agent
from os import listdir
from os.path import isfile, join
import importlib
import random
import numpy as np
import tensorflow as tf
import torch

@dataclass
class Arguments:
  group: str = 'GROUP_011'
  seed: int = 0

args = Arguments()

seed = args.seed
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
tf.random.set_seed(seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


path = './'+args.group+'/'
files = [f for f in listdir(path) if isfile(join(path, f))]
if ('agent.py' not in files) or ('env_info.txt' not in files):
  print("Your GROUP folder does not contain agent.py or env_info.txt!")
  exit()

with open(path+'env_info.txt') as f:
  lines = f.readlines()
env_type = lines[0].lower()

env = get_environment(env_type) 
env_eval = get_environment(env_type)

env.seed(seed)
env_eval.seed(seed)

if 'jellybean' in env_type:
  env_specs = {'scent_space': env.scent_space, 'vision_space': env.vision_space, 'feature_space': env.feature_space, 'action_space': env.action_space}
if 'mujoco' in env_type:
  env_specs = {'observation_space': env.observation_space, 'action_space': env.action_space}
agent_module = importlib.import_module(args.group+'.agent')
agent = agent_module.Agent(env_specs)

# Note these can be environment specific and you are free to experiment with what works best for you
total_timesteps = 2000000
evaluation_freq = 5000
n_episodes_to_evaluate = 2

learning_curve = []
for batch in range(int(total_timesteps / evaluation_freq)):
  print("")
  print(f"Batch {batch + 1} out of {int(total_timesteps / evaluation_freq)}:")
  learning_curve.extend(train_agent(agent, env, env_eval, evaluation_freq, evaluation_freq, n_episodes_to_evaluate))
  torch.save(agent.model.state_dict(), "weights.pth")






Batch 1 out of 400:
timestep: 5000, acc_reward: 26.60

Batch 2 out of 400:
timestep: 5000, acc_reward: 32.00

Batch 3 out of 400:
timestep: 5000, acc_reward: 20.60

Batch 4 out of 400:
timestep: 5000, acc_reward: 2.00

Batch 5 out of 400:
timestep: 5000, acc_reward: 26.60

Batch 6 out of 400:
timestep: 5000, acc_reward: 0.10

Batch 7 out of 400:
timestep: 5000, acc_reward: 32.10

Batch 8 out of 400:
timestep: 5000, acc_reward: 63.50

Batch 9 out of 400:
timestep: 5000, acc_reward: 26.60

Batch 10 out of 400:
timestep: 5000, acc_reward: 79.80

Batch 11 out of 400:
timestep: 5000, acc_reward: 33.60

Batch 12 out of 400:
timestep: 5000, acc_reward: 26.60

Batch 13 out of 400:
timestep: 5000, acc_reward: 4.00

Batch 14 out of 400:
timestep: 5000, acc_reward: 20.60

Batch 15 out of 400:
timestep: 5000, acc_reward: 23.40

Batch 16 out of 400:
timestep: 5000, acc_reward: 20.60

Batch 17 out of 400:
timestep: 5000, acc_reward: 26.60

Batch 18 out of 400:
timestep: 5000, acc_reward: 33.60

Bat

KeyboardInterrupt: ignored