In [None]:
import tensorflow as tf
print(tf.__version__)
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from stable_baselines3 import PPO , DDPG, SAC #,RecurrentPPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
import matplotlib.pyplot as plt
import joblib
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import torch

# Check if a GPU is available
print(f"Is CUDA available? {torch.cuda.is_available()}")

# Get the number of available GPUs
num_gpus = torch.cuda.device_count()
print(f"Number of GPUs available: {num_gpus}")

# Print the name of each available GPU
for i in range(num_gpus):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

# Load the pre-trained models
crop_parameters_estimator = load_model('/media/ai-ws2/8f2a1bf6-3409-412b-abae-522c4615e68f/ImanHindi/AGHC/AutonomousGreenHouseChallenge/Code/Training/wcp_LSTM_model_fs_model.h5')
resource_consumption_estimator = load_model('/media/ai-ws2/8f2a1bf6-3409-412b-abae-522c4615e68f/ImanHindi/AGHC/AutonomousGreenHouseChallenge/Code/Training/rc_LSTM_model_fs_model.h5')
gh_climate_estimator = load_model('/media/ai-ws2/8f2a1bf6-3409-412b-abae-522c4615e68f/ImanHindi/AGHC/AutonomousGreenHouseChallenge/Code/Training/ghc_mlp_model_fs_model.h5')

rewards = []
CP_important_feature=[ 'Tair', 'pH_drain_PC', 'Cum_irr', 't_heat_vip', 'water_sup', 'Tot_PAR', 'water_sup_intervals_vip_min', 'PipeGrow', 'EC_drain_PC',
                        'BlackScr', 'co2_dos', 'Tot_PAR_Lamps', 'scr_enrg_vip', 'Rhair', 'HumDef', 'days']

GH_C_important_feature=['PARout',  'Tout',  'Iglob',  'RadSum',  'scr_enrg_vip',  't_heat_vip',  'int_white_vip',  'scr_blck_vip',  'pH_drain_PC',  'co2_vip',
                        't_ventlee_vip',  'days']

RC_important_feature=[     'Cum_irr',  'BlackScr',  'water_sup_intervals_vip_min',  'EC_drain_PC',  'pH_drain_PC',  'CO2air',  'water_sup',  'HumDef',
                            'Rhair',  'days',  'Tot_PAR' ]

actions_sp=['co2_vip', 'int_white_vip', 'pH_drain_PC','scr_blck_vip', 'scr_enrg_vip',  't_heat_vip',
            't_ventlee_vip', 'water_sup','water_sup_intervals_vip_min',  'days'] #10 Actions 

important_ghc=['BlackScr','CO2air','Cum_irr','EC_drain_PC','PipeGrow','HumDef','Rhair','Tair','Tot_PAR','Tot_PAR_Lamps']

weather_sp=[ 'PARout',   'Tout', 'Iglob',  'RadSum']

GH_C_Out_columns=['AssimLight','BlackScr','CO2air','Cum_irr','EC_drain_PC','EnScr','HumDef','PipeGrow','PipeLow','Rhair','Tair','Tot_PAR',
                  'Tot_PAR_Lamps','VentLee','Ventwind' ,'assim_vip','co2_dos' ]
class GreenhouseEnv(gym.Env):
    def __init__(self, crop_parameters_estimator, resource_consumption_estimator, gh_climate_estimator,weather_data):
        super(GreenhouseEnv, self).__init__()
        self.crop_parameters_estimator = crop_parameters_estimator
        self.resource_consumption_estimator = resource_consumption_estimator
        self.gh_climate_estimator=gh_climate_estimator
        # Action space: shape (2016, 34) - 2016 time steps for 34 control setpoints
        #self.action_space = spaces.Box(low=0, high=1, shape=(2016, 9),dtype=np.float64)
        self.action_space = spaces.Box(low=0,high=1,shape=(2016 * 9,),dtype=np.float64) #Flattened shape: 2016*34
    

        # Observation space: weather (2016, 10), crop parameters (1, 3), resource consumption (1, 5)
        self.observation_space = spaces.Dict({
            'weather': spaces.Box(low=0, high=1, shape=(2016, 10), dtype=np.float64),
            'crop_params': spaces.Box(low=0, high=1, shape=(1, 3), dtype=np.float64),
            'resource_consumption': spaces.Box(low=0, high=7, shape=(1, 5), dtype=np.float64),
            'gh_climate': spaces.Box(low=-10, high=10, shape=(2016, 10), dtype=np.float64)
        })
        
        # Initial state
        self.weather_data = weather_data
        self.current_step = 0
        self.steps = 0
        self.max_steps = 23
        self.days=np.array([(i // 288 ) / 166  for i in range(2016*(self.max_steps))]).reshape(2016*(self.max_steps), 1)

    def reset(self, seed=None):
        if seed is not None:
            np.random.seed(seed)
        self.steps = 0
        self.days=np.array([(i // 288 ) / 166  for i in range(2016*(self.max_steps))]).reshape(2016*(self.max_steps), 1)
        # Random initial control setpoints
        #control_setpoints = np.random.uniform(0, 1, size=(1,2016, 17)).reshape(2016, 17)
        #day = np.array([(i // 288 + 1) / 166 * (self.steps + 1) for i in range(2016)]).reshape(2016, 1)
        #initial_control_setpoints = np.concatenate([control_setpoints, day], axis=1).reshape(1, 2016, 18)
        
        initial_crop_params = np.zeros(3).reshape(1, 3)+np.random.uniform(0, .1, size=(1,3))
        initial_resource_consumption = np.zeros(5).reshape(1, 5)+np.random.uniform(0, .1, size=(1,5))
        initial_daily_resource_consumption = np.zeros(shape=(7,1,5)).reshape(7,1,5)+np.random.uniform(0, .1, size=(7,1,5))
        initial_gh_climate=np.zeros(shape=(1,2016,10)).reshape(2016,10)+np.random.uniform(0, .1, size=(2016,10))
        # Return the initial state
        self.state = {
            'weather': np.array(self.weather_data[self.steps * 2016:(self.steps + 1) * 2016]),
            'crop_params': initial_crop_params,
            'resource_consumption': initial_resource_consumption,
            'gh_climate': initial_gh_climate
        }
        print(self.state['weather'].shape)
        self.daily_res_cons=initial_daily_resource_consumption

        return self.state, {}

    def step(self, action):
        action = action.reshape((2016, 9))
        day = np.array(self.days[self.steps * 2016:(self.steps + 1) * 2016]).reshape(2016, 1) #np.array([(i // 288 + 1) / 166 * (self.steps + 1) for i in range(2016)]).reshape(2016, 1)
        self.steps += 1
        print('day',day)
        control_setpoints = np.concatenate([action.reshape(2016, 9), day], axis=1)
        # current_crop_par=self.state['crop_params'].reshape(1,1,3)
        # print(current_crop_par.shape)
        # current_res_cons=self.daily_res_cons
        
        
        control_setpoints = pd.DataFrame(control_setpoints, columns=actions_sp)
        print(type(self.state['weather']))
        print(type(control_setpoints))
        weather=pd.DataFrame(self.state['weather'],columns=w_columns)
        #predict gh_clmate:
        w_sp_data = pd.concat([weather, control_setpoints], axis=1)
        GH_C_Estimator_Input=np.array(w_sp_data[GH_C_important_feature])

        print('w_sp_data',w_sp_data.shape)
        ghclimate=self.gh_climate_estimator.predict(GH_C_Estimator_Input).reshape(2016,17)
        gh_climate=pd.DataFrame(ghclimate,columns=GH_C_Out_columns)
        
        # Predict crop parameters using the crop LSTM model
        #gh_sp_data = np.concatenate([gh_climate, control_setpoints], axis=1).reshape(1,2016,35)
        CP_Estimator_Input = np.array(pd.concat([gh_climate, control_setpoints], axis=1)[CP_important_feature]).reshape(1,2016,16)
        weekly_crop_params = self.crop_parameters_estimator.predict(CP_Estimator_Input).reshape(1,3)

        RC_Estimator_Input = np.array(pd.concat([gh_climate, control_setpoints], axis=1)[RC_important_feature]).reshape(7, 288, 11)
        # Average daily control setpoints for resource consumption
        #daily_actions = gh_sp_data.reshape(7, 288, 35)
        #actions = np.array([a[i].mean(axis=0) for i in range(7)])
        
        daily_resource_consumption = self.resource_consumption_estimator.predict(RC_Estimator_Input)
        print(RC_Estimator_Input.shape) #(7, 288, 11)

        print(daily_resource_consumption.shape) #(7, 288, 5)
        self.daily_res_cons=daily_resource_consumption
        weekly_resource_consumption = self.resource_consumption_estimator.predict(RC_Estimator_Input).sum(axis=0).reshape(1, 5)
        print(weekly_resource_consumption.shape)
        #print(self.resource_consumption_estimator.predict(actions)[:])
        for i in self.resource_consumption_estimator.predict(RC_Estimator_Input)[:]:
            #print(i)
            high_rc = np.any(i>=1)
            #print(high_rc)
            if high_rc:
                #print('break')
                break
        # Update state
        self.state = {
            'weather': self.weather_data[self.steps * 2016:(self.steps + 1) * 2016],
            'crop_params': weekly_crop_params,
            'resource_consumption': weekly_resource_consumption,
            'gh_climate': np.array(gh_climate[important_ghc]).reshape(2016,10)
        }
        #print(np.max(action),np.min(action),np.any(action>1),np.any(action<0))
        # Calculate reward
        print(weekly_crop_params[0].shape)
        print(weekly_resource_consumption[0].shape)
        print(weekly_resource_consumption)

        reward = self.calculate_reward(weekly_crop_params[0], weekly_resource_consumption[0],high_rc,action)
        #print(np.any(weekly_crop_params[0]>=1 ) , np.any(daily_resource_consumption[0]>=7),high_rc)
        done = bool((self.steps >= self.max_steps) ) #or np.any(weekly_crop_params[0]<0.2 ) or np.any(weekly_resource_consumption[0]>7) or np.all(weekly_crop_params[0]<0.5) ) #or high_rc)
        #print('done',done)
        #rewards.append(reward)
        
        return self.state, reward, done, done, {}
    def calculate_reward(self, crop_params, resource_consumption,high_rc,current_actions):
        punishment=0
        big_reward=0
        alpha, beta, delta,gamma = 1, .5, 0.02,0.002 #1, 0.5, 0.1
        w1, w2, w3 = 1, 1, 1
        p1, p2, p3, p4 = 1, 1, 1, 1
        #w1, w2, w3 = 1, 1, 1
        #p1, p2, p3, p4 = 1, 1, 1, 1
        max_stem_elong, max_stem_thick, max_cum_trusses = 1.0, 1.0, 1.0
        max_heat, max_co2, max_electricity, max_irrigation = 7.0, 7.0, 7.0, 7.0
        action_variation_penalty = 1/np.std(current_actions, axis=1).mean()  # Penalize low standard deviation

        crop_reward = (w1 * (crop_params[0] / max_stem_elong) +
                       w2 * (crop_params[1] / max_stem_thick) +
                       w3 * (crop_params[2] / max_cum_trusses))
        resource_penalty = (p1 * (resource_consumption[0] / max_heat) +
                            p2 * (resource_consumption[1] / max_co2) +
                            p3 * ((resource_consumption[2] + resource_consumption[3]) / max_electricity) +
                            p4 * (resource_consumption[4] / max_irrigation))
        #if high_rc:
        #    punishment=-.1
        #if np.any(current_actions>1 ) or np.any(current_actions<0):
        #    punishment-=.9
        if np.any(resource_consumption>7):
            print('resource_consumption>7',np.any(resource_consumption>7))
            punishment-=1
        if np.any(crop_params<.5):
            print('crop_params<.5',np.any(crop_params<.5))
            punishment-=0.8
        
        if np.all(crop_params<.5):
            print('all crop_params<.5',np.all(crop_params<.5))
            punishment-=1
        if np.any(crop_params>=.7):
            print('crop_params>=.7',np.any(crop_params>=.7))
            big_reward+=.5
        if np.any(crop_params>=.8):
            print('crop_params>=.8',np.any(crop_params>=.8))
            big_reward+=.8
        if np.all(crop_params>=.5):
            print('all crop_params>=.5',np.all(crop_params>=.5))
            big_reward+=.8
        if np.all(crop_params>=.7):
            print('all crop_params>=.7',np.all(crop_params>=.7))
            big_reward+=.8
        if np.all(crop_params>=.8):
            print('all crop_params>=.8',np.all(crop_params>=.8))
            big_reward+=1
        efficiency_factor = crop_reward / (1 + resource_penalty)
        print(efficiency_factor,'efficiency_factor')
        #Stability penalty
        #s = 0.01
        #max_delta_action = 16.0  # Assuming actions are normalized between 0 and 1
        # stability=0
        # #print(len(current_actions)-1)
        # for idx in range(len(current_actions)-1):
        #     stability+=np.sum(np.abs(current_actions[idx]-current_actions[idx+1]))/ max_delta_action
        #     #print('Action Difference',stability)
        # stability_penalty = s * stability
        # print(stability_penalty)
        reward = alpha * crop_reward - beta * resource_penalty + delta * (crop_reward / (1 + resource_penalty)) + punishment + big_reward - gamma * action_variation_penalty

        # reward = alpha * crop_reward - beta * resource_penalty +punishment+big_reward + delta * efficiency_factor - gamma * stability_penalty
        print(reward)
        #reward = alpha * crop_reward - beta * resource_penalty + delta * efficiency_factor+punishment+big_reward
        #print(reward)
        return reward
    
    # def calculate_reward(self, crop_params, resource_consumption,high_rc,current_actions):
    #     punishment=0
    #     big_reward=0
    #     alpha, beta, delta,gamma = 1, 0.2, 0.1,0.01 #1, 0.5, 0.1
    #     w1, w2, w3 = 0.40, 0.30, 0.30
    #     p1, p2, p3, p4 = 0.2, 0.3, 0.2, 0.3
    #     #w1, w2, w3 = 1, 1, 1
    #     #p1, p2, p3, p4 = 1, 1, 1, 1
    #     max_stem_elong, max_stem_thick, max_cum_trusses = 1.0, 1.0, 1.0
    #     max_heat, max_co2, max_electricity, max_irrigation = 7.0, 7.0, 7.0, 7.0

    #     crop_reward = (w1 * (crop_params[0] / max_stem_elong) +
    #                    w2 * (crop_params[1] / max_stem_thick) +
    #                    w3 * (crop_params[2] / max_cum_trusses))
    #     resource_penalty = (p1 * (resource_consumption[0] / max_heat) +
    #                         p2 * (resource_consumption[1] / max_co2) +
    #                         p3 * ((resource_consumption[2] + resource_consumption[3]) / max_electricity) +
    #                         p4 * (resource_consumption[4] / max_irrigation))
    #     #if high_rc:
    #     #    punishment=-.1
    #     #if np.any(current_actions>1 ) or np.any(current_actions<0):
    #     #    punishment-=.9
    #     if np.any(resource_consumption>7):
    #         print('resource_consumption>7',np.any(resource_consumption>7))
    #         punishment-=1
    #     if np.any(resource_consumption>5):
    #         print('resource_consumption>5',np.any(resource_consumption>5))
    #         punishment-=.1
    #     if np.any(crop_params<.5):
    #         print('crop_params<.5',np.any(crop_params<.5))
    #         punishment-=0.8
        
    #     if np.all(crop_params<.5):
    #         print('all crop_params<.5',np.all(crop_params<.5))
    #         punishment-=1
    #     if np.any(crop_params>=.7):
    #         print('crop_params>=.7',np.any(crop_params>=.7))
    #         big_reward+=.2
    #     if np.any(crop_params>=.8):
    #         print('crop_params>=.8',np.any(crop_params>=.8))
    #         big_reward+=.2
    #     if np.all(crop_params>=.5):
    #         print('all crop_params>=.5',np.all(crop_params>=.5))
    #         big_reward+=.2
    #     if np.all(crop_params>=.7):
    #         print('all crop_params>=.7',np.all(crop_params>=.7))
    #         big_reward+=.2
    #     if np.all(crop_params>=.8):
    #         print('all crop_params>=.8',np.all(crop_params>=.8))
    #         big_reward+=.2
    #     # efficiency_factor = crop_reward / (1 + resource_penalty)
    #     # print(efficiency_factor,'efficiency_factor')
    #     # Stability penalty
    #     # s = 0.01
    #     # max_delta_action = 16.0  # Assuming actions are normalized between 0 and 1
    #     # stability=0
    #     # #print(len(current_actions)-1)
    #     # for idx in range(len(current_actions)-1):
    #     #     stability+=np.sum(np.abs(current_actions[idx]-current_actions[idx+1]))/ max_delta_action
    #     #     #print('Action Difference',stability)
    #     # stability_penalty = s * stability
    #     #print(stability_penalty)
    #     reward = alpha * crop_reward - beta * resource_penalty +punishment+big_reward #+ delta * efficiency_factor #- gamma * stability_penalty
    #     print(reward)
    #     #reward = alpha * crop_reward - beta * resource_penalty + delta * efficiency_factor+punishment+big_reward
    #     #print(reward)
    #     return reward

# Assuming weather_data is a preprocessed time series array for the environment
weather_data = pd.read_csv('/media/ai-ws2/8f2a1bf6-3409-412b-abae-522c4615e68f/ImanHindi/AGHC/AutonomousGreenHouseChallenge/Code/Training/weather_fill_missing_values.csv',index_col='%time')#pd.read_csv('D:\\Iman\\AGHC\\CherryTomato\\Data\\Weather\\Weather.csv',index_col='%time') #  # Example placeholder data #np.random.rand(48384, 10)  # Placeholder data
w_columns=weather_data.columns
print(weather_data.shape)
#print(weather_data.isnull().sum())
scaler=MinMaxScaler()
weather_data=scaler.fit_transform(weather_data)
#weather_data=pd.DataFrame(weather_data,columns=w_columns)

#print(weather_data[:10])
print(weather_data.shape)
weather_data = np.random.rand(48384, 10)

# Initialize the environment
env = GreenhouseEnv(crop_parameters_estimator, resource_consumption_estimator,gh_climate_estimator, weather_data)
check_env(env)

import os
# Define a folder for saving logs_fs and models
log_dir = "./logs_fs/"
os.makedirs(log_dir, exist_ok=True)

# Define the policy and agent
#model = PPO("MultiInputPolicy", env, verbose=0, learning_rate=1e-5,tensorboard_log=log_dir) #MultiInputLstmPolicy #MultiInputPolicy
# Get the number of actions in the environment (for noise generation)

# Wrap the environment in a monitor to track performance
env = Monitor(env, log_dir)
# Reduced learning rate for more stable learning, increased exploration noise
# Define the policy and agent
#model = PPO("MultiInputPolicy", env, verbose=0, learning_rate=1e-5,tensorboard_log=log_dir) #MultiInputLstmPolicy #MultiInputPolicy
# Assuming `model` is a PyTorch model


In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
import torch as th
import torch.nn as nn
from stable_baselines3.common.policies import ActorCriticPolicy
from stable_baselines3.common.torch_layers import MlpExtractor

# # Custom policy network for PPO
# class CustomPolicy(nn.Module):
#     def __init__(self, obs_space, action_space):
#         super(CustomPolicy, self).__init__()
#         self.fc1 = nn.Linear(obs_space['weather'].shape[1], 512)
#         self.fc2 = nn.Linear(512, 256)
#         self.action_head = nn.Linear(256, action_space.shape[1])  # Output for each feature

#     def forward(self, obs):
#         x = th.relu(self.fc1(obs['weather']))
#         x = th.relu(self.fc2(x))
#         actions = th.tanh(self.action_head(x))  # Each feature has unique action values
#         return actions

# # Use the custom policy in the PPO model
# policy_kwargs = {
#     'features_extractor_class': CustomPolicy,
#     'net_arch': [512, 256]
# }
# #model = PPO("MultiInputPolicy", env, policy_kwargs=policy_kwargs, verbose=1, learning_rate=1e-4)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
print(device)
ppo_model = PPO(
    "MultiInputPolicy", 
    env, 
    #policy_kwargs=policy_kwargs,
    verbose=1, 
    learning_rate=1e-5,
    n_steps=2048,  # Larger number of steps before each policy update
    batch_size=64,  # Smaller batches for more frequent updates
    n_epochs=20,  # More epochs for stable learning
    gamma=0.99,  # Discount factor
    clip_range=0.2,  # PPO clipping range
    ent_coef=0.09,  # Entropy coefficient for exploration
    gae_lambda=0.95,  # GAE lambda for advantage estimation
    vf_coef=0.5,  # Value function coefficient
    max_grad_norm=0.5,  # Gradient clipping
    tensorboard_log=log_dir,
    #tensorboard_log="./ppo_greenhouse_tensorboard/",  # Optional: Tensorboard log directory
    policy_kwargs=dict(net_arch=[256, 256,128]) , # Smaller policy network for faster learning
    device=device
)

# class CustomPolicy(ActorCriticPolicy):
#     def __init__(self, observation_space, action_space, lr_schedule, **kwargs):
#         super(CustomPolicy, self).__init__(observation_space, action_space, lr_schedule, **kwargs)

#         # Here we define the MlpExtractor for the policy and value features.
#         # Adjust net_arch according to your needs.
#         self.mlp_extractor = MlpExtractor(
#             self.features_dim,   # features_dim is computed by parent class
#             net_arch=[256, 256, 128],
#             activation_fn=nn.ReLU
#         )

#         # The MlpExtractor returns separate latent vectors for policy and value
#         latent_dim_pi = 128
#         latent_dim_vf = 128

#         self.action_net = nn.Linear(latent_dim_pi, action_space.shape[0])
#         self.value_net = nn.Linear(latent_dim_vf, 1)

#         # Weight initialization
#         nn.init.orthogonal_(self.action_net.weight, gain=0.01)
#         nn.init.constant_(self.action_net.bias, 0)
#         nn.init.orthogonal_(self.value_net.weight, gain=1)
#         nn.init.constant_(self.value_net.bias, 0)

#         # Register the layers so the policy knows what parameters to optimize
#         self._build(lr_schedule)

#     def _build(self, lr_schedule):
#         # Setup optimizer
#         self.optimizer = th.optim.Adam(self.parameters(), lr=lr_schedule(1))

#     def forward(self, obs):
#         # Called by the parent to get the distribution and value estimates
#         features = self.extract_features(obs)
#         latent_pi, latent_vf = self.mlp_extractor(features)
#         action_logits = self.action_net(latent_pi)
#         value = self.value_net(latent_vf)
#         return action_logits, value
    
    
# ppo_model = PPO(
#     policy=CustomPolicy,
#     env=env,
#     verbose=1, 
#     learning_rate=1e-5,
#     n_steps=2048,  # Larger number of steps before each policy update
#     batch_size=64,  # Smaller batches for more frequent updates
#     n_epochs=20,  # More epochs for stable learning
#     gamma=0.99,  # Discount factor
#     clip_range=0.2,  # PPO clipping range
#     ent_coef=0.05,  # Entropy coefficient for exploration
#     gae_lambda=0.95,  # GAE lambda for advantage estimation
#     vf_coef=0.5,  # Value function coefficient
#     max_grad_norm=0.5,  # Gradient clipping
#     tensorboard_log=log_dir,
#     device=device
# )



In [4]:
# from stable_baselines3 import PPO
# from stable_baselines3.common.policies import ActorCriticPolicy
# from stable_baselines3.common.torch_layers import MlpExtractor
# import torch as th
# import torch.nn as nn
# import torch

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
# print(device)
# from stable_baselines3 import PPO
# from stable_baselines3.common.policies import ActorCriticPolicy
# from stable_baselines3.common.torch_layers import MlpExtractor
# import torch as th
# import torch.nn as nn
# import torch

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# print(device)

# class CustomPolicy(ActorCriticPolicy):
#     def __init__(self, observation_space, action_space, lr_schedule, **kwargs):
#         super(CustomPolicy, self).__init__(observation_space, action_space, lr_schedule, **kwargs)
#         # Do not define mlp_extractor here, it will be defined in _build_mlp_extractor

#     def _build_mlp_extractor(self):
#         # This method is called by the parent class after features_dim is known
#         mlp_extractor = MlpExtractor(
#             self.features_dim,
#             net_arch=[256, 256, 128],
#             activation_fn=nn.ReLU
#         )

#         # Set the mlp_extractor as an attribute of self
#         self.mlp_extractor = mlp_extractor

#         # Define the actor and value networks after the MlpExtractor
#         latent_dim_pi = 128
#         latent_dim_vf = 128

#         self.action_net = nn.Linear(latent_dim_pi, self.action_space.shape[0])
#         self.value_net = nn.Linear(latent_dim_vf, 1)

#         nn.init.orthogonal_(self.action_net.weight, gain=0.01)
#         nn.init.constant_(self.action_net.bias, 0)
#         nn.init.orthogonal_(self.value_net.weight, gain=1)
#         nn.init.constant_(self.value_net.bias, 0)

#         return self.mlp_extractor

# # Make sure that env and log_dir are defined


# # Ensure that you have defined `env` and `log_dir` before this point.
# # For example:
# # env = ... # your environment
# # log_dir = "./logs/"
# # os.makedirs(log_dir, exist_ok=True)

# ppo_model = PPO(
#     policy=CustomPolicy,
#     env=env,
#     verbose=1, 
#     learning_rate=1e-5,
#     n_steps=2048,
#     batch_size=64,
#     n_epochs=20,
#     gamma=0.99,
#     clip_range=0.2,
#     ent_coef=0.05,
#     gae_lambda=0.95,
#     vf_coef=0.5,
#     max_grad_norm=0.5,
#     tensorboard_log=log_dir,
#     device=device
# )

In [None]:
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback

# Create evaluation callback to monitor agent performance every 100k steps
eval_callback = EvalCallback(env, best_model_save_path='./logs_fs/best_model/',
                             log_path='./logs_fs/results/', eval_freq=10_000, deterministic=True, render=False)

# Save models every 100k steps
checkpoint_callback = CheckpointCallback(save_freq=10_000, save_path='./logs_fs/checkpoints/', name_prefix='ppo_greenhouse')


# Train the model
ppo_model.learn(total_timesteps=120_000, callback=[eval_callback, checkpoint_callback])

# Save the trained model
ppo_model.save("ppo_greenhouse_final_model")

# Evaluate the model
mean_reward, std_reward = evaluate_policy(ppo_model, env, n_eval_episodes=10, deterministic=True)
print(f"Mean reward: {mean_reward} +/- {std_reward}")

In [None]:

from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback

# Create evaluation callback to monitor agent performance every 100k steps
eval_callback = EvalCallback(env, best_model_save_path='./logs_fs/best_model/',
                             log_path='./logs_fs/results/', eval_freq=10000, deterministic=True, render=False)

# Save models every 100k steps
checkpoint_callback = CheckpointCallback(save_freq=10000, save_path='./logs_fs/checkpoints/', name_prefix='ppo_greenhouse')

import os
from stable_baselines3 import PPO

# Define the path to the checkpoint directory
checkpoint_dir = './logs_fs/checkpoints/'
# checkpoint_prefix = 'ppo_greenhouse'

# # Find the latest checkpoint file
# checkpoints = [f for f in os.listdir(checkpoint_dir) if f.startswith(checkpoint_prefix) and f.endswith('.zip')]

# # Check for valid checkpoints and extract steps
# if not checkpoints:
#     raise FileNotFoundError("No checkpoint files found.")

# checkpoint_steps = []
# for f in checkpoints:
#     # Split the filename by underscores and find the step part
#     parts = f.split('_')
#     if len(parts) >= 3:  # Ensure there are enough parts to avoid IndexError
#         step_part = parts[-2]  # Get the part before 'steps'
#         if step_part.isdigit():  # Check if it's a digit
#             checkpoint_steps.append(int(step_part))
#         else:
#             print(f"Warning: '{step_part}' is not a valid step number. Skipping this file.")
#     else:
#         print(f"Warning: File '{f}' does not match expected format. Skipping.")

# # Ensure we have valid steps to process
# if not checkpoint_steps:
#     raise ValueError("No valid checkpoint steps found.")

# # Find the latest checkpoint based on the step number
# latest_checkpoint_index = checkpoint_steps.index(max(checkpoint_steps))

# latest_checkpoint = checkpoints[latest_checkpoint_index]
latest_checkpoint_path = os.path.join(checkpoint_dir, 'ppo_greenhouse_80000_steps.zip')

# Load the model from the latest checkpoint
print(f"Loading model from {latest_checkpoint_path}")
ppo_model = PPO.load(latest_checkpoint_path, env=env)

# Continue training
ppo_model.learn(total_timesteps=300_000, callback=[eval_callback, checkpoint_callback])  # Adjust timesteps as needed

# Optionally save the model again after continuing training
ppo_model.save("ppo_greenhouse_final_model_after_resume")

# Evaluate the model
mean_reward, std_reward = evaluate_policy(ppo_model, env, n_eval_episodes=10, deterministic=True)
print(f"Mean reward: {mean_reward} +/- {std_reward}")




In [None]:
# import gymnasium as gym
# from gymnasium import spaces
# import numpy as np
# import pandas as pd
# from tensorflow.keras.models import load_model
# from stable_baselines3 import PPO , DDPG, SAC #,RecurrentPPO
# from stable_baselines3.common.env_checker import check_env
# from stable_baselines3.common.evaluation import evaluate_policy
# from stable_baselines3.common.monitor import Monitor
# from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
# import matplotlib.pyplot as plt
# import joblib
# from sklearn.preprocessing import StandardScaler,MinMaxScaler
# import torch

# # Check if a GPU is available
# print(f"Is CUDA available? {torch.cuda.is_available()}")

# # Get the number of available GPUs
# num_gpus = torch.cuda.device_count()
# print(f"Number of GPUs available: {num_gpus}")

# # Print the name of each available GPU
# for i in range(num_gpus):
#     print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

# # Load the pre-trained models
# crop_parameters_estimator = load_model('/media/ai-ws2/8f2a1bf6-3409-412b-abae-522c4615e68f/ImanHindi/AGHC/AutonomousGreenHouseChallenge/Code/Training/wcp_LSTM_model_model.h5')
# resource_consumption_estimator = load_model('/media/ai-ws2/8f2a1bf6-3409-412b-abae-522c4615e68f/ImanHindi/AGHC/AutonomousGreenHouseChallenge/Code/Training/rc_LSTM_model_model.h5')

# rewards = []

# class GreenhouseEnv(gym.Env):
#     def __init__(self, crop_parameters_estimator, resource_consumption_estimator, weather_data):
#         super(GreenhouseEnv, self).__init__()
#         self.crop_parameters_estimator = crop_parameters_estimator
#         self.resource_consumption_estimator = resource_consumption_estimator
        
#         # Action space: shape (2016, 34) - 2016 time steps for 34 control setpoints
#         self.action_space = spaces.Box(low=0, high=1, shape=(2016, 34), dtype=np.float64)
        
#         # Observation space: weather (2016, 10), crop parameters (1, 3), resource consumption (1, 5)
#         self.observation_space = spaces.Dict({
#             'weather': spaces.Box(low=0, high=1, shape=(2016, 10), dtype=np.float64),
#             'crop_params': spaces.Box(low=0, high=1, shape=(1, 3), dtype=np.float64),
#             'resource_consumption': spaces.Box(low=0, high=7, shape=(1, 5), dtype=np.float64)
#         })
        
#         # Initial state
#         self.weather_data = weather_data
#         self.current_step = 0
#         self.steps = 0
#         self.max_steps = 23

#     def reset(self, seed=None):
#         if seed is not None:
#             np.random.seed(seed)
#         self.steps = 0
        
#         # Random initial control setpoints
#         control_setpoints = np.random.uniform(0, 1, size=(2016, 34)).reshape(2016, 34)
#         day = np.array([(i // 288 + 1) / 166 * (self.steps + 1) for i in range(2016)]).reshape(2016, 1)
#         initial_control_setpoints = np.concatenate([control_setpoints, day], axis=1).reshape(1, 2016, 35)
        
#         initial_crop_params = np.zeros(3).reshape(1, 3)+0.1
#         initial_resource_consumption = np.zeros(5).reshape(1, 5)+0.1
#         initial_daily_resource_consumption = np.zeros(shape=(7,1,5)).reshape(7,1,5)+0.1
#         # Return the initial state
#         self.state = {
#             'weather': self.weather_data[self.steps * 2016:(self.steps + 1) * 2016],
#             'crop_params': initial_crop_params,
#             'resource_consumption': initial_resource_consumption
#         }
#         print(self.state['weather'].shape)
#         self.daily_res_cons=initial_daily_resource_consumption

#         return self.state, {}

#     def step(self, action):
#         self.steps += 1
#         day = np.array([(i // 288 + 1) / 166 * (self.steps + 1) for i in range(2016)]).reshape(2016, 1)
#         control_setpoints = np.concatenate([action.reshape(2016, 34), day], axis=1).reshape(1, 2016, 35)
#         current_crop_par=self.state['crop_params'].reshape(1,1,3)
#         print(current_crop_par.shape)
#         current_res_cons=self.daily_res_cons
#         # Predict crop parameters using the crop LSTM model
#         weekly_crop_params = self.crop_parameters_estimator.predict(control_setpoints).reshape(1,3)
        
#         # Average daily control setpoints for resource consumption
#         daily_actions = control_setpoints.reshape(7, 288, 35)
#         #actions = np.array([a[i].mean(axis=0) for i in range(7)])
#         #actions = pd.DataFrame(actions, columns=[
#         #    'AssimLight', 'BlackScr', 'CO2air', 'Cum_irr', 'EC_drain_PC', 'EnScr', 'HumDef', 
#         #    'PipeGrow', 'PipeLow', 'Rhair', 'Tair', 'Tot_PAR', 'Tot_PAR_Lamps', 'VentLee', 
#         #    'Ventwind', 'assim_vip', 'co2_dos', 'co2_vip', 'dx_vip', 'int_blue_vip', 'int_farred_vip', 
#         #    'int_red_vip', 'int_white_vip', 'pH_drain_PC', 'scr_blck_vip', 'scr_enrg_vip', 
#         #    't_grow_min_vip', 't_heat_vip', 't_rail_min_vip', 't_ventlee_vip', 't_ventwind_vip', 
#         #    'water_sup', 'water_sup_intervals_vip_min', 'window_pos_lee_vip'
#         #])
#         daily_resource_consumption = self.resource_consumption_estimator.predict(daily_actions)
#         print(current_res_cons.shape)
#         print(daily_resource_consumption.shape)
#         self.daily_res_cons=daily_resource_consumption
#         weekly_resource_consumption = self.resource_consumption_estimator.predict(daily_actions).sum(axis=0).reshape(1, 5)
#         print(weekly_resource_consumption.shape)
#         #print(self.resource_consumption_estimator.predict(actions)[:])
#         for i in self.resource_consumption_estimator.predict(daily_actions)[:]:
#             #print(i)
#             high_rc = np.any(i>=1)
#             #print(high_rc)
#             if high_rc:
#                 #print('break')
#                 break
#         # Update state
#         self.state = {
#             'weather': self.weather_data[self.steps * 2016:(self.steps + 1) * 2016],
#             'crop_params': weekly_crop_params,
#             'resource_consumption': weekly_resource_consumption
#         }
#         #print(np.max(action),np.min(action),np.any(action>1),np.any(action<0))
#         # Calculate reward
#         print(weekly_crop_params[0].shape)
#         print(weekly_resource_consumption[0].shape)
#         reward = self.calculate_reward(weekly_crop_params[0], weekly_resource_consumption[0],high_rc,action)
#         #print(np.any(weekly_crop_params[0]>=1 ) , np.any(daily_resource_consumption[0]>=7),high_rc)
#         done = bool((self.steps >= self.max_steps) or np.any(weekly_crop_params[0]<0.2 ) or np.any(weekly_resource_consumption[0]>7) or np.all(weekly_crop_params[0]<0.5) ) #or high_rc)
#         #print('done',done)
#         #rewards.append(reward)
        
#         return self.state, reward, done, done, {}

#     def calculate_reward(self, crop_params, resource_consumption,high_rc,current_actions):
#         punishment=0
#         big_reward=0
#         alpha, beta, delta,gamma = 1, 0.1, 0.1,0.01 #1, 0.5, 0.1
#         w1, w2, w3 = 0.40, 0.30, 0.30
#         p1, p2, p3, p4 = 0.2, 0.3, 0.2, 0.3
#         #w1, w2, w3 = 1, 1, 1
#         #p1, p2, p3, p4 = 1, 1, 1, 1
#         max_stem_elong, max_stem_thick, max_cum_trusses = 1.0, 1.0, 1.0
#         max_heat, max_co2, max_electricity, max_irrigation = 7.0, 7.0, 7.0, 7.0

#         crop_reward = (w1 * (crop_params[0] / max_stem_elong) +
#                        w2 * (crop_params[1] / max_stem_thick) +
#                        w3 * (crop_params[2] / max_cum_trusses))
#         resource_penalty = (p1 * (resource_consumption[0] / max_heat) +
#                             p2 * (resource_consumption[1] / max_co2) +
#                             p3 * ((resource_consumption[2] + resource_consumption[3]) / max_electricity) +
#                             p4 * (resource_consumption[4] / max_irrigation))
#         #if high_rc:
#         #    punishment=-.1
#         #if np.any(current_actions>1 ) or np.any(current_actions<0):
#         #    punishment-=.9
#         if np.any(resource_consumption>7):
#             print('resource_consumption>7',np.any(resource_consumption>7))
#             punishment-=1
#         if np.any(crop_params<.5):
#             print('crop_params<.5',np.any(crop_params<.5))
#             punishment-=0.8
        
#         if np.all(crop_params<.5):
#             print('all crop_params<.5',np.all(crop_params<.5))
#             punishment-=1
#         if np.any(crop_params>=.7):
#             print('crop_params>=.7',np.any(crop_params>=.7))
#             big_reward+=.2
#         if np.any(crop_params>=.8):
#             print('crop_params>=.8',np.any(crop_params>=.8))
#             big_reward+=.2
#         if np.all(crop_params>=.5):
#             print('all crop_params>=.5',np.all(crop_params>=.5))
#             big_reward+=1
#         if np.all(crop_params>=.7):
#             print('all crop_params>=.7',np.all(crop_params>=.7))
#             big_reward+=1
#         if np.all(crop_params>=.8):
#             print('all crop_params>=.8',np.all(crop_params>=.8))
#             big_reward+=1
#         #efficiency_factor = crop_reward / (1 + resource_penalty)
#         # Stability penalty
#         #s = 0.01
#         #max_delta_action = 34.0  # Assuming actions are normalized between 0 and 1
#         #stability=0
#         ##print(len(current_actions)-1)
#         #for idx in range(len(current_actions)-1):
#         #    stability+=np.sum(np.abs(current_actions[idx]-current_actions[idx+1]))/ max_delta_action
#         #    #print('Action Difference',stability)
#         #stability_penalty = s * stability
#         #print(stability_penalty)
#         reward = alpha * crop_reward - beta * resource_penalty +punishment+big_reward #+ delta * efficiency_factor #- gamma * stability_penalty
#         print(reward)
#         #reward = alpha * crop_reward - beta * resource_penalty + delta * efficiency_factor+punishment+big_reward
#         #print(reward)
#         return reward


# # Assuming weather_data is a preprocessed time series array for the environment
# # weather_data = pd.read_csv('/media/ai-ws2/8f2a1bf6-3409-412b-abae-522c4615e68f/ImanHindi/AGHC/AutonomousGreenHouseChallenge/Code/Training/weather_fill_missing_values.csv',index_col='%time')#pd.read_csv('D:\\Iman\\AGHC\\CherryTomato\\Data\\Weather\\Weather.csv',index_col='%time') #  # Example placeholder data #np.random.rand(48384, 10)  # Placeholder data
# # #print(weather_data.head())
# # print(weather_data.shape)
# # #print(weather_data.isnull().sum())
# # scaler=MinMaxScaler()
# # weather_data=scaler.fit_transform(weather_data)
# # #print(weather_data[:10])
# # print(weather_data.shape)
# # Assuming weather_data is randomly generated for the environment
# # Generate random weather data with 2016 time steps (e.g., 24 hours * 7 days) and 10 features per step
# weather_data = np.random.rand(48384, 10)  # Example shape (48384, 10) to simulate two weeks of data with 5-minute intervals

# # Optionally scale the random data to be between 0 and 1, though it's already generated in this range
# scaler = MinMaxScaler()
# weather_data = scaler.fit_transform(weather_data)
# print(weather_data.shape)
# # Initialize the environment
# env = GreenhouseEnv(crop_parameters_estimator, resource_consumption_estimator, weather_data)
# check_env(env)

# import os
# # Define a folder for saving logs_fs and models
# log_dir = "./logs_fs/"
# os.makedirs(log_dir, exist_ok=True)

# # Define the policy and agent
# #model = PPO("MultiInputPolicy", env, verbose=0, learning_rate=1e-5,tensorboard_log=log_dir) #MultiInputLstmPolicy #MultiInputPolicy
# # Get the number of actions in the environment (for noise generation)

# # Wrap the environment in a monitor to track performance
# env = Monitor(env, log_dir)
# # Reduced learning rate for more stable learning, increased exploration noise
# # Define the policy and agent
# #model = PPO("MultiInputPolicy", env, verbose=0, learning_rate=1e-5,tensorboard_log=log_dir) #MultiInputLstmPolicy #MultiInputPolicy
# # Assuming `model` is a PyTorch model


In [None]:
import os
from stable_baselines3 import PPO

# Define the path to the best model directory
best_model_dir = './logs_fs/best_model/'

# Check if the best model file exists
best_model_filename = 'best_model.zip'  # Change this to your actual best model filename if different
best_model_path = os.path.join(best_model_dir, best_model_filename)

# Load the best model
if os.path.exists(best_model_path):
    print(f"Loading the best model from {best_model_path}")
    best_model = PPO.load(best_model_path, env=env)
else:
    raise FileNotFoundError(f"No best model found at {best_model_path}")

# Optionally, evaluate the best model
mean_reward, std_reward = evaluate_policy(best_model, env, n_eval_episodes=10, deterministic=True)
print(f"Mean reward of the best model: {mean_reward} +/- {std_reward}")


In [None]:
import pandas as pd 
orgin_path = "/media/ai-ws2/8f2a1bf6-3409-412b-abae-522c4615e68f/ImanHindi/AGHC/AutonomousGreenHouseChallenge/Code/Training/"
filenames = ['AICU', 'Automatoes',
             'Digilog', 'IUACAAS', 
             'Reference', 'TheAutomators'
             ]

T1_Action=pd.read_csv(orgin_path+f'{filenames[0]}Actions.csv',index_col='%time')
T2_Action=pd.read_csv(orgin_path+f'{filenames[1]}Actions.csv',index_col='%time')
T3_Action=pd.read_csv(orgin_path+f'{filenames[2]}Actions.csv',index_col='%time')
T4_Action=pd.read_csv(orgin_path+f'{filenames[3]}Actions.csv',index_col='%time')
T5_Action=pd.read_csv(orgin_path+f'{filenames[4]}Actions.csv',index_col='%time')
T6_Action=pd.read_csv(orgin_path+f'{filenames[5]}Actions.csv',index_col='%time')


T1_Results=pd.read_csv(orgin_path+f'{filenames[0]}Results.csv',index_col='%time')[:18]
T2_Results=pd.read_csv(orgin_path+f'{filenames[1]}Results.csv',index_col='%time')[:18]
T3_Results=pd.read_csv(orgin_path+f'{filenames[2]}Results.csv',index_col='%time')[:18]
T4_Results=pd.read_csv(orgin_path+f'{filenames[3]}Results.csv',index_col='%time')[:18]
T5_Results=pd.read_csv(orgin_path+f'{filenames[4]}Results.csv',index_col='%time')[:18]
T6_Results=pd.read_csv(orgin_path+f'{filenames[5]}Results.csv',index_col='%time')[:18]

T1_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[0]}resource_consumption.csv',index_col='%time')
T2_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[1]}resource_consumption.csv',index_col='%time')
T3_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[2]}resource_consumption.csv',index_col='%time')
T4_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[3]}resource_consumption.csv',index_col='%time')
T5_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[4]}resource_consumption.csv',index_col='%time')
T6_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[5]}resource_consumption.csv',index_col='%time')

actions_sp=['co2_vip', 'int_white_vip', 'pH_drain_PC','scr_blck_vip', 'scr_enrg_vip',  't_heat_vip',
            't_ventlee_vip', 'water_sup','water_sup_intervals_vip_min',  'days'] #10 Actions 
resources=['Heat_cons','ElecHigh','ElecLow', 'CO2_cons','Irr']
crop_param=['Stem_elong' ,'Stem_thick','Cum_trusses']

episode_rewards = []
#model_actions=np.empty(action.shape)
model_results=[]
model_r_consumption=[]
env = GreenhouseEnv(crop_parameters_estimator, resource_consumption_estimator,gh_climate_estimator, weather_data)
check_env(env)
state = env.reset(seed=42)[0]
done = False
episode_reward = 0
i=0
model_action=[]
while not done:
        action = best_model.predict(state,deterministic=True)[0]
        if i==0:
          model_actions=action 
          print('model_actions',model_actions)  
        else: 
          model_actions=np.concatenate([model_actions,action],axis=0)
        print(model_actions)
        print(model_actions.shape)
        i+=1
        #model_actions=np.concatenate([np.array(model_actions),action],axis=0)
        state, reward, done, _, _ = env.step(action)
        model_results.append(state['crop_params'])
        model_r_consumption.append(state['resource_consumption'])
        model_action.append(action)
        episode_reward += reward
episode_rewards.append(episode_reward.astype('float32'))
print(f'Episode Reward: {episode_reward.astype("float32")}')
sp=actions_sp
sp_noday=['co2_vip', 'int_white_vip', 'pH_drain_PC','scr_blck_vip', 'scr_enrg_vip',  't_heat_vip',
            't_ventlee_vip', 'water_sup','water_sup_intervals_vip_min'] #10 Actions 
model_actions.shape
model_results=pd.DataFrame(np.array(model_results).reshape(23,3),columns=crop_param)[:18]
model_r_consumption=pd.DataFrame(np.array(model_r_consumption).reshape(23,5),columns=resources)
model_actions=pd.DataFrame(np.array(model_action).reshape(23*2016,9),columns=sp_noday)
model_results.set_index(T1_Results.index,inplace=True)
model_r_consumption.set_index(T1_resource_Consumption[:-1].index,inplace=True)

model_actions=model_actions[:len(T5_Action.index)].set_index(T5_Action.index[:len(model_actions.index)])

model_actions.index
teams=['AICU', 'Automatoes',
             'Digilog', 'IUACAAS', 
             'Reference', 'TheAutomators','Ours']
teams_Results={ teams[0]: T1_Results,
                 teams[1]: T2_Results,
                 teams[2]: T3_Results,
                 teams[3]: T4_Results,
                 teams[4]: T5_Results,
                 teams[5]: T6_Results,
                 teams[6]:  model_results,
                    
                    }
teams_Actions={ teams[0]: T1_Action[actions_sp],
                teams[1]: T2_Action[actions_sp],
                teams[2]: T3_Action[actions_sp],
                teams[3]: T4_Action[actions_sp],
                teams[4]: T5_Action[actions_sp],
                teams[5]: T6_Action[actions_sp],
                teams[6]:  model_actions[sp_noday],
                    }
teams_rc={      teams[0]: T1_resource_Consumption,
                teams[1]: T2_resource_Consumption,
                teams[2]: T3_resource_Consumption,
                teams[3]: T4_resource_Consumption,
                teams[4]: T5_resource_Consumption,
                teams[5]: T6_resource_Consumption,
                teams[6]:  model_r_consumption,
    
}
#plot Teams results
print(teams_Results[teams[0]].columns)
#plot Teams actions
import matplotlib.pyplot as plt
for result in teams_Results[teams[0]].columns:
    for i in range(len(teams)):
        teams_Results[teams[i]][result].plot(ylabel=result,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
    plt.title(f'green_house_{result}_time_series_plot for all Teams ')
    plt.legend()
    #save_fig(f'green_house_{result}_time_series_plot ')

    plt.show()
#plot Teams actions
# import matplotlib.pyplot as plt
# for action in teams_Actions[teams[5]].columns:
#     for i in range(len(teams)):
#         teams_Actions[teams[i]]["2019-12-16 00:00:00" : "2019-12-17 00:00:00"][action].plot(ylabel=action,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
#     plt.title(f'green_house_{action}_time_series_plot for all Teams ')
#     plt.legend()
#     #save_fig(f'green_house_{action}_time_series_plot ')

#     plt.show()
#plot Teams rc

#plot Teams actions
import matplotlib.pyplot as plt
for resource in teams_rc[teams[0]].columns[:-1]:
    for i in range(len(teams)):
        teams_rc[teams[i]][resource].plot(ylabel=resource,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
    plt.title(f'green_house_{resource}_time_series_plot for all Teams ')
    plt.legend()
    #save_fig(f'green_house_{resource}_time_series_plot ')

    plt.show()
teams_episode_rewards = {}
for team in teams:
    print(f'Evaluate {team} Actions....')
    
    teams_model_actions=np.empty((2016, 10))
    Our_model_actions=np.empty((2016, 9))
    
    model_results=[]
    model_r_consumption=[]
    state = env.reset(seed=42)[0]
    done = False
    episode_reward = []
    i=0
    sp=actions_sp
    while not done:
            if team=='Ours':
                action = best_model.predict(state,deterministic=True)[0]
                Our_model_actions=np.concatenate([np.array(Our_model_actions),np.array(action).reshape(2016,9)],axis=0)
                state, reward, done, _, _ = env.step(np.array(action))
            else:
                action = teams_Actions[team].iloc[2016*i:2016*(i+1)][sp]
                teams_model_actions=np.concatenate([np.array(teams_model_actions),np.array(action)],axis=0)
                print(action.shape)
                state, reward, done, _, _ = env.step(np.array(action.drop('days',axis=1)).reshape(2016*9,))
            i+=1
            
            
            model_results.append(state['crop_params'])
            model_r_consumption.append(state['resource_consumption'])
            episode_reward.append(reward.astype('float32'))
    print(episode_reward)
    teams_episode_rewards[team]=np.array(episode_reward)
    print(f'{team} Episode Reward: {np.array(teams_episode_rewards[team]).sum().astype("float32")}')


#plt.bar(x=final_result.index,height=final_result[0],rotation = 90)

In [None]:
import pandas as pd 
orgin_path = "/media/ai-ws2/8f2a1bf6-3409-412b-abae-522c4615e68f/ImanHindi/AGHC/AutonomousGreenHouseChallenge/Code/Training/"
filenames = ['AICU', 'Automatoes',
             'Digilog', 'IUACAAS', 
             'Reference', 'TheAutomators'
             ]

T1_Action=pd.read_csv(orgin_path+f'{filenames[0]}Actions.csv',index_col='%time')
T2_Action=pd.read_csv(orgin_path+f'{filenames[1]}Actions.csv',index_col='%time')
T3_Action=pd.read_csv(orgin_path+f'{filenames[2]}Actions.csv',index_col='%time')
T4_Action=pd.read_csv(orgin_path+f'{filenames[3]}Actions.csv',index_col='%time')
T5_Action=pd.read_csv(orgin_path+f'{filenames[4]}Actions.csv',index_col='%time')
T6_Action=pd.read_csv(orgin_path+f'{filenames[5]}Actions.csv',index_col='%time')


T1_Results=pd.read_csv(orgin_path+f'{filenames[0]}Results.csv',index_col='%time')[:18]
T2_Results=pd.read_csv(orgin_path+f'{filenames[1]}Results.csv',index_col='%time')[:18]
T3_Results=pd.read_csv(orgin_path+f'{filenames[2]}Results.csv',index_col='%time')[:18]
T4_Results=pd.read_csv(orgin_path+f'{filenames[3]}Results.csv',index_col='%time')[:18]
T5_Results=pd.read_csv(orgin_path+f'{filenames[4]}Results.csv',index_col='%time')[:18]
T6_Results=pd.read_csv(orgin_path+f'{filenames[5]}Results.csv',index_col='%time')[:18]

T1_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[0]}resource_consumption.csv',index_col='%time')
T2_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[1]}resource_consumption.csv',index_col='%time')
T3_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[2]}resource_consumption.csv',index_col='%time')
T4_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[3]}resource_consumption.csv',index_col='%time')
T5_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[4]}resource_consumption.csv',index_col='%time')
T6_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[5]}resource_consumption.csv',index_col='%time')

actions_sp=['co2_vip', 'int_white_vip', 'pH_drain_PC','scr_blck_vip', 'scr_enrg_vip',  't_heat_vip',
            't_ventlee_vip', 'water_sup','water_sup_intervals_vip_min',  'days'] #10 Actions 
resources=['Heat_cons','ElecHigh','ElecLow', 'CO2_cons','Irr']
crop_param=['Stem_elong' ,'Stem_thick','Cum_trusses']

episode_rewards = []
#model_actions=np.empty(action.shape)
model_results=[]
model_r_consumption=[]
env = GreenhouseEnv(crop_parameters_estimator, resource_consumption_estimator,gh_climate_estimator, weather_data)
check_env(env)
state = env.reset(seed=42)[0]
done = False
episode_reward = 0
i=0

while not done:
        action = best_model.predict(state,deterministic=True)[0]
        print(action.shape)
        if i==0:
          model_actions=action.reshape(2016,9)
        else: 
          model_actions=np.concatenate([model_actions,action.reshape(2016,9)],axis=0)
        print(model_actions.shape)
        i+=1
        #model_actions=np.concatenate([np.array(model_actions),action],axis=0)
        state, reward, done, _, _ = env.step(action)
        model_results.append(state['crop_params'])
        model_r_consumption.append(state['resource_consumption'])
        episode_reward += reward
episode_rewards.append(episode_reward.astype('float32'))
print(f'Episode Reward: {episode_reward.astype("float32")}')
sp=actions_sp
sp_noday=['co2_vip', 'int_white_vip', 'pH_drain_PC','scr_blck_vip', 'scr_enrg_vip',  't_heat_vip',
            't_ventlee_vip', 'water_sup','water_sup_intervals_vip_min'] #10 Actions 
model_actions.shape
model_results=pd.DataFrame(np.array(model_results).reshape(23,3),columns=crop_param)[:18]
model_r_consumption=pd.DataFrame(np.array(model_r_consumption).reshape(23,5),columns=resources)
model_actions=pd.DataFrame(np.array(model_actions),columns=sp_noday)
model_results.set_index(T1_Results.index,inplace=True)
model_r_consumption.set_index(T1_resource_Consumption[:-1].index,inplace=True)

model_actions=model_actions[:len(T5_Action.index)].set_index(T5_Action.index[:len(model_actions.index)])

model_actions.index
teams=['AICU', 'Automatoes',
             'Digilog', 'IUACAAS', 
             'Reference', 'TheAutomators','Ours']
teams_Results={ teams[0]: T1_Results,
                 teams[1]: T2_Results,
                 teams[2]: T3_Results,
                 teams[3]: T4_Results,
                 teams[4]: T5_Results,
                 teams[5]: T6_Results,
                 teams[6]:  model_results,
                    
                    }
teams_Actions={ teams[0]: T1_Action[actions_sp],
                teams[1]: T2_Action[actions_sp],
                teams[2]: T3_Action[actions_sp],
                teams[3]: T4_Action[actions_sp],
                teams[4]: T5_Action[actions_sp],
                teams[5]: T6_Action[actions_sp],
                teams[6]:  model_actions[sp_noday],
                    }
teams_rc={      teams[0]: T1_resource_Consumption,
                teams[1]: T2_resource_Consumption,
                teams[2]: T3_resource_Consumption,
                teams[3]: T4_resource_Consumption,
                teams[4]: T5_resource_Consumption,
                teams[5]: T6_resource_Consumption,
                teams[6]:  model_r_consumption,
    
}
#plot Teams results
print(teams_Results[teams[0]].columns)
#plot Teams actions
import matplotlib.pyplot as plt
for result in teams_Results[teams[0]].columns:
    for i in range(len(teams)):
        teams_Results[teams[i]][result].plot(ylabel=result,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
    plt.title(f'green_house_{result}_time_series_plot for all Teams ')
    plt.legend()
    #save_fig(f'green_house_{result}_time_series_plot ')

    plt.show()
#plot Teams actions
# import matplotlib.pyplot as plt
# for action in teams_Actions[teams[5]].columns:
#     for i in range(len(teams)):
#         teams_Actions[teams[i]]["2019-12-16 00:00:00" : "2019-12-17 00:00:00"][action].plot(ylabel=action,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
#     plt.title(f'green_house_{action}_time_series_plot for all Teams ')
#     plt.legend()
#     #save_fig(f'green_house_{action}_time_series_plot ')

#     plt.show()
#plot Teams rc

#plot Teams actions
import matplotlib.pyplot as plt
for resource in teams_rc[teams[0]].columns[:-1]:
    for i in range(len(teams)):
        teams_rc[teams[i]][resource].plot(ylabel=resource,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
    plt.title(f'green_house_{resource}_time_series_plot for all Teams ')
    plt.legend()
    #save_fig(f'green_house_{resource}_time_series_plot ')

    plt.show()
teams_episode_rewards = {}
all_teams_results={}
all_teams_r_consumption={}
all_teams_episode_rewards={}
for team in teams:
    print(f'Evaluate {team} Actions....')
    
    teams_model_actions=np.empty((2016, 9))
    Our_model_actions=np.empty((2016, 9))
    
    model_results=[]
    
    model_r_consumption=[]
    state = env.reset(seed=42)[0]
    done = False
    episode_reward = []
    i=0
    sp=actions_sp
    while not done:
            if team=='Ours':
                action = best_model.predict(state,deterministic=True)[0]
                Our_model_actions=np.concatenate([np.array(Our_model_actions),np.array(action).reshape(2016,9)],axis=0)
                state, reward, done, _, _ = env.step(np.array(action))
                
            else:
                action = teams_Actions[team].iloc[2016*i:2016*(i+1)][sp_noday]
                teams_model_actions=np.concatenate([np.array(teams_model_actions),np.array(action)],axis=0)
                print(action.shape)
                state, reward, done, _, _ = env.step(np.array(action))
            i+=1
            
            
            model_results.append(state['crop_params'])
            model_r_consumption.append(state['resource_consumption'])
            episode_reward.append(reward.astype('float32'))
            
    model_results=pd.DataFrame(np.array(model_results).reshape(23,3),columns=crop_param)[:18]
    model_r_consumption=pd.DataFrame(np.array(model_r_consumption).reshape(23,5),columns=resources)
    model_actions=pd.DataFrame(np.array(model_actions),columns=sp_noday)
    model_results.set_index(T1_Results.index,inplace=True)
    model_r_consumption.set_index(T1_resource_Consumption[:-1].index,inplace=True)
    all_teams_results[team]=model_results
    all_teams_r_consumption[team]=model_r_consumption
    print(episode_reward)
    teams_episode_rewards[team]=np.array(episode_reward)
    print(f'{team} Episode Reward: {np.array(teams_episode_rewards[team]).sum().astype("float32")}')
    all_teams_episode_rewards[team]=np.array(teams_episode_rewards[team]).sum().astype("float32")

#plt.bar(x=final_result.index,height=final_result[0],rotation = 90)

In [9]:
#all_teams_results

In [None]:
#plot Teams results
print(all_teams_results[teams[0]].columns)
#plot Teams actions
import matplotlib.pyplot as plt
for result in all_teams_results[teams[0]].columns:
    for i in range(len(teams)):
        all_teams_results[teams[i]][result].plot(ylabel=result,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
    plt.title(f'green_house_{result}_time_series_plot for all Teams ')
    plt.legend()
    #save_fig(f'green_house_{result}_time_series_plot ')

    plt.show()
#plot Teams actions
# import matplotlib.pyplot as plt
# for action in teams_Actions[teams[5]].columns:
#     for i in range(len(teams)):
#         teams_Actions[teams[i]]["2019-12-16 00:00:00" : "2019-12-17 00:00:00"][action].plot(ylabel=action,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
#     plt.title(f'green_house_{action}_time_series_plot for all Teams ')
#     plt.legend()
#     #save_fig(f'green_house_{action}_time_series_plot ')

#     plt.show()
#plot Teams rc

#plot Teams actions
import matplotlib.pyplot as plt
for resource in all_teams_r_consumption[teams[0]].columns[:]:
    for i in range(len(teams)):
        all_teams_r_consumption[teams[i]][resource].plot(ylabel=resource,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
    plt.title(f'green_house_{resource}_time_series_plot for all Teams ')
    plt.legend()
    #save_fig(f'green_house_{resource}_time_series_plot ')

    plt.show()


In [None]:
all_teams_results

In [None]:
all_teams_r_consumption

In [None]:
import matplotlib.pyplot as plt
for result in teams_Results[teams[0]].columns:
    for i in range(len(teams)):
        teams_Results[teams[i]][result].plot(ylabel=result,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
    plt.title(f'green_house_{result}_time_series_plot for all Teams ')
    plt.legend()
    #save_fig(f'green_house_{result}_time_series_plot ')

    plt.show()
#plot Teams actions
# import matplotlib.pyplot as plt
# for action in teams_Actions[teams[5]].columns:
#     for i in range(len(teams)):
#         teams_Actions[teams[i]]["2019-12-16 00:00:00" : "2019-12-17 00:00:00"][action].plot(ylabel=action,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
#     plt.title(f'green_house_{action}_time_series_plot for all Teams ')
#     plt.legend()
#     #save_fig(f'green_house_{action}_time_series_plot ')

#     plt.show()
#plot Teams rc

#plot Teams actions
import matplotlib.pyplot as plt
for resource in teams_rc[teams[0]].columns[:-1]:
    for i in range(len(teams)):
        teams_rc[teams[i]][resource].plot(ylabel=resource,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
    plt.title(f'green_house_{resource}_time_series_plot for all Teams ')
    plt.legend()
    #save_fig(f'green_house_{resource}_time_series_plot ')

    plt.show()


In [None]:
print(teams_episode_rewards['Ours'])
print()
# teams
# teams_episode_rewards_df=pd.DataFrame(teams_episode_rewards,columns=teams)
# print(teams_episode_rewards_df)
# final_result=pd.DataFrame(teams_episode_rewards_df.sum(axis=0).sort_values(ascending=False))
# final_result
# import matplotlib.pyplot as plt

# plt.bar(final_result.index, final_result[0])
# plt.xticks(rotation=90)  # Rotate the x-axis labels by 90 degrees
# plt.show()


In [None]:

rewards=[]
for team_reward in teams_episode_rewards:
    print(team_reward)
    print(len(teams_episode_rewards[team_reward]))
    print(teams_episode_rewards[team_reward].sum())
    rewards.append(teams_episode_rewards[team_reward].sum())
teams_episode_rewards_df=pd.DataFrame(np.array(rewards).reshape(1,7),columns=teams)
print(teams_episode_rewards_df)
final_result=pd.DataFrame(teams_episode_rewards_df.sum(axis=0).sort_values(ascending=False))
final_result
import matplotlib.pyplot as plt

plt.bar(final_result.index, final_result[0])
plt.xticks(rotation=90)  # Rotate the x-axis labels by 90 degrees
plt.show()


In [None]:
plt.bar(final_result.index, final_result[0])
plt.xticks(rotation=90)  # Rotate the x-axis labels by 90 degrees
plt.show()

In [None]:
import matplotlib.pyplot as plt
mean_rc={}

for i in range(len(teams)):
        mean_rc[teams[i]]=teams_rc[teams[i]].mean(axis=0)[['CO2_cons','ElecHigh','ElecLow','Heat_cons','Irr']].round(2)

mean_rc_df=pd.DataFrame.from_dict(mean_rc)
mean_rc_df.to_csv('mean_rc_df_data.csv', index=True)  # Set index=True to include the index column
mean_rc


In [None]:
import matplotlib.pyplot as plt
mean_cp={}

for i in range(len(teams)):
        mean_cp[teams[i]]=teams_Results[teams[i]].mean(axis=0).round(2)

mean_cp
mean_cp_df=pd.DataFrame.from_dict(mean_cp)
mean_cp_df.to_csv('mean_cp_df_data.csv', index=True)  # Set index=True to include the index column
mean_cp


In [None]:
for i in range(len(teams)):
    #print(greenhouse_climate[teams[i]].index)
    teams_Actions[teams[i]]['%time'] = pd.to_datetime(teams_Actions[teams[i]].index.str.split('_').str[0])
    teams_Actions[teams[i]].set_index('%time', inplace=True)  # Set datetime as the index

    
    teams_Results[teams[i]]['%Time'] = pd.to_datetime(teams_Results[teams[i]].index.str.split('_').str[0])
    teams_Results[teams[i]].set_index('%Time', inplace=True)  # Set datetime as the index

teams

In [None]:
# features=['assim_vip', 'co2_vip', 'dx_vip', 'int_blue_vip', 'int_farred_vip', 'int_red_vip',
#        'int_white_vip', 'pH_drain_PC', 'scr_blck_vip', 'scr_enrg_vip',
#        't_grow_min_vip', 't_heat_vip', 't_rail_min_vip', 't_ventlee_vip',
#        't_ventwind_vip',  'water_sup_intervals_vip_min',
#        'window_pos_lee_vip']
features=['co2_vip', 'dx_vip', 'int_blue_vip', 'int_farred_vip', 'int_red_vip',
       'int_white_vip', 'pH_drain_PC', 'scr_blck_vip', 'scr_enrg_vip',
       't_grow_min_vip', 't_heat_vip', 't_rail_min_vip', 't_ventlee_vip',
       't_ventwind_vip',  'water_sup_intervals_vip_min',
       'window_pos_lee_vip']

for feature in sp_noday:
    fig = plt.figure(figsize=(10, 7))
    ax = fig.gca()
    for i in range(len(teams)):
        if i!=5:
            daily_avg = teams_Actions[teams[i]][feature].resample('h').mean() # compute the mean for each hour
            weekly_avg=daily_avg.resample('d').mean()
        else:
            daily_avg = teams_Actions[teams[i]][feature].resample('h').mean()  # compute the mean for each hour
            weekly_avg=daily_avg.resample('d').mean()

        #weekly_avg = Weekly_Crop_Parameters[filenames[i]][output].resample('W').mean()
        #print(dayly_avg)
        period=slice("2019-12-16 00:00:00", "2020-05-20 00:00:00")
        rolling_average_daily = daily_avg[period].rolling(window=24*7).mean()
        #rolling_average_weekly = weekly_avg[period].rolling(window=4).mean()

        rolling_average_daily.plot(grid=True,legend=True,label=f'{teams[i]}')
    ax.set_xlabel(xlabel='%time')
    ax.set_ylabel(ylabel=f'{feature}')
    ax.set_title(f'{feature} time series of Teams')

    plt.show()

#add the legend for each line of plot which represent the file_name