In [2]:
import os
os.environ['OPENBLAS_NUM_THREADS'] = '1'       #Disactivate multiprocessing for numpy
import numpy as np
import matplotlib.pyplot as plt
import json
import gymnasium as gym
import yaml
from datetime import datetime

import stable_baselines3

from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecNormalize, SubprocVecEnv
from stable_baselines3 import PPO, A2C, SAC, TD3
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
from stable_baselines3.common.callbacks import EvalCallback, CallbackList, CheckpointCallback, StopTrainingOnNoModelImprovement

from sogym.mmc_optim import run_mmc
from sogym.env import sogym
from sogym.expert_generation import generate_expert_dataset, generate_mmc_solutions, generate_dataset
from sogym.utils import profile_and_analyze,ImageDictExtractor, CustomBoxDense
from sogym.callbacks import FigureRecorderCallback, MaxRewardCallback, GradientNormCallback, GradientClippingCallback
from sogym.pretraining import pretrain_agent, ExpertDataSet

import torch
import torch as th
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import random_split, Dataset
from IPython.display import display

%load_ext autoreload
%autoreload 2

print('SB3 version:', stable_baselines3.__version__)
# Let's make the code device agnostic:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using device:', device)

SB3 version: 2.2.1
Using device: cuda


In [3]:
# Common parameters
params = {
    'observation_type': 'topopt_game',
    'vol_constraint_type': 'hard',
    'resolution': 50
}

# Train environment
train_env = sogym(mode='train', check_connectivity=True, **params)
# Evaluation environment
eval_env = sogym(mode='test', check_connectivity=False, **params)

## To generate expert trajectories using the conventional optimization process:

In [None]:
obs = train_env.reset()
cfg = {
            'optimizer':'hybrid', #optimiser choice
            'xInt':0.25, #initial interval of components in x
            'yInt':0.25, #initial interval of components in y
            'E':1.0, #Young's modulus
            'nu':0.3, #Poisson ratio
            'h':1, #thickness
            'dgt0':5, #significant digit of sens.
            'scl':1, #scale factor for obj
            'p':6,  #power of super ellipsoid
            'lmd':100, #power of KS aggregation   
            'maxiter':1000, # maximum number of outer iterations
            'alpha':1e-9, # This is the threshold level in the Heaviside function
            'epsilon':0.2, #This is the regularization term in the Heaviside function
            'maxinnerinit':1, # This is the maximum number of inner iterations for GCMMA
            'switch':-0.000002, # This is the switch criteria for the hybrid optimizer
            'convergence_threshold':2e-4, #This is the threshold for the relative change in the objective function
            'xmin':(0.0, 0.0, 0.0, 0.00, 0.00, -np.pi),
            'xmax':(train_env.dx, train_env.dy, 0.7*min(train_env.dx,train_env.dy), 0.05*min(train_env.dx,train_env.dy),0.05*min(train_env.dx,train_env.dy), np.pi)
        }

#run_mmc(train_env.conditions,train_env.nelx,train_env.nely,train_env.dx,train_env.dy,plotting='contour',verbose=0,cfg=cfg)
dataset_folder = "./dataset/topologies/mmc"
#generate_mmc_solutions(key=0,dataset_folder="/home/thomas/Documents/scratch_thomas/GitHub/sogym_v2/dataset/topologies/mmc")
generate_dataset(dataset_folder= dataset_folder, num_threads=32, num_samples=20000)

## To visualize a random expert trajectory:

In [None]:
from sogym.utils import visualize_expert_trajectory
import os
import random
#visualize a random expert trajectory:

file_path = os.path.join(dataset_folder, random.choice(os.listdir(dataset_folder)))
visualize_expert_trajectory(train_env, file_path)

In [None]:
from sogym.expert_generation import check_duplicates, copy_unique_files

#Check if there are any duplicate Boundary conditions in the expert trajectories:

# Adjust the percentage as needed, e.g., 50 for 50%
check_duplicates(dataset_folder, percentage=100)

# Specify the path to the unique_files.txt file
unique_files_file = 'unique_files.txt'
# Specify the destination folder for the unique files
destination_folder = '/home/thomas/Documents/scratch_thomas/GitHub/sogym_v2/dataset/topologies/unique_combined'
# Copy the unique files to the destination folder
copy_unique_files(unique_files_file, destination_folder)


In [None]:
from sogym.expert_generation import generate_expert_dataset
import pickle

# Specify the number of permutations to generate
num_permutations = None
observation_type = "topopt_game"

# Specify the environment configuration (optional)
env_kwargs = {
    'mode': 'train',
    'observation_type': observation_type,
    'vol_constraint_type': 'hard',
    'seed': 42,
    'resolution' : 50,
    'check_connectivity':True
}

directory_path = "/home/thomas/Documents/scratch_thomas/GitHub/sogym_v2/dataset/topologies/unique_combined"
generate_expert_dataset(directory_path,
                        env_kwargs,
                        observation_type=observation_type, 
                        plot_terminated=False,
                        num_permutations = num_permutations, 
                        file_fraction=1.0)

In [None]:
from sogym.pretraining import load_expert_dataset
chunk_dir = '/home/thomas/Documents/scratch_thomas/GitHub/sogym_v2/dataset/expert/unique_combined_topopt_game_20240515_071024'
expert_dataset = load_expert_dataset(chunk_dir, train_env)
#print length of expertdataset:
print(len(expert_dataset)/8)

In [None]:
chosen_policy = "MlpPolicy" if observation_type == 'box_dense' else "MultiInputPolicy"

feature_extractor = ImageDictExtractor if observation_type == 'image' or observation_type == 'topopt_game' else CustomBoxDense

# Load the YAML file
env=train_env

with open("algorithms.yaml", "r") as file:
    config = yaml.safe_load(file)

# Extract the parameters for the desired algorithm
algorithm_name = "PPO"  # or "TD3"
algorithm_params = config[algorithm_name]

policy_kwargs = dict(
    features_extractor_class=feature_extractor,
    net_arch = config['common']['net_arch'],
    share_features_extractor = False
)

# Create the model based on the algorithm name and parameters
if algorithm_name == "SAC":
    model = SAC(env=env,
                policy = chosen_policy, 
                policy_kwargs=policy_kwargs,
                device=device, 
                **algorithm_params)

elif algorithm_name == "PPO":
    model = PPO(env=env, 
                policy = chosen_policy, 
                policy_kwargs=policy_kwargs,
                device = device, 
                **algorithm_params)
    
    

elif algorithm_name == "TD3":
    # Create the action noise object
    n_actions = env.action_space.shape[-1]
    action_noise_params = algorithm_params.pop("action_noise")
    action_noise = NormalActionNoise(mean=action_noise_params["mean"] * np.ones(n_actions),
                                     sigma=action_noise_params["sigma"] * np.ones(n_actions))
    model = TD3(env=env,
                policy =chosen_policy, 
                policy_kwargs=policy_kwargs,
                action_noise=action_noise,
                device=device, 
                **algorithm_params)

# Get the current date and time
current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")

# Create the tb_log_name string
tb_log_name = f"{algorithm_name}_{current_datetime}"

In [None]:
from torchinfo import summary
total_params = sum(p.numel() for p in model.policy.parameters())
print(f"Total number of parameters: {total_params:,}")
data = {k: v for k, v in observation.items()}
# Assuming you have a PyTorch model named 'model' and the input size is (3, 224, 224)
summary(model.policy)


In [None]:
todays_date = datetime.now().strftime("%Y%m%d")
pretrain_agent(
    model,
    expert_dataset,
    env,
    test_env = eval_env,
    batch_size=4096,
    epochs=2000,
    scheduler_gamma=1.0,
    learning_rate= 3e-2,
    log_interval=5,
    no_cuda=False,
    seed=1,
    verbose=True,
    test_batch_size=1024,
    early_stopping_patience=100,
    plot_curves=True,
    tensorboard_log_dir="tb_logs/imitation/PPO_{}".format(todays_date)",
    checkpoint_dir="checkpoints/imitation/PPO_{}".format(todays_date),
    load_checkpoint=None,
    comet_ml_api_key="No20MKxPKu7vWLOUQCFBRO8mo",
    comet_ml_project_name="pretraining_rl",
    comet_ml_experiment_name="PPO_{}".format(todays_date),
    eval_freq = 5,
    l2_reg_strength=0.001,
    max_grad_norm = 10.0
)