In [None]:
from pathlib import Path
import matplotlib.pyplot as plt 
IMAGES_PATH = Path() / "images" / "ReinforcementLearningResults"
IMAGES_PATH.mkdir(parents=True, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = IMAGES_PATH / f"{fig_id}.{fig_extension}"
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [None]:
!pip install gymnasium
!pip install stable_baselines3
!pip install tensorflow==2.13.0

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


In [None]:
!pip install scikit-learn==1.1.3

In [None]:
!pip install dill


In [None]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from stable_baselines3 import PPO , DDPG, SAC #,RecurrentPPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy
import matplotlib.pyplot as plt
import joblib
from sklearn.preprocessing import StandardScaler,MinMaxScaler
# Load the pre-trained models
crop_parameters_estimator = load_model('wcp_LSTM_model_model.h5')
resource_consumption_estimator = joblib.load('GradientBoostingDailyResourceConsumptionEstimator.pkl')

rewards = []

class GreenhouseEnv(gym.Env):
    def __init__(self, crop_parameters_estimator, resource_consumption_estimator, weather_data):
        super(GreenhouseEnv, self).__init__()
        self.crop_parameters_estimator = crop_parameters_estimator
        self.resource_consumption_estimator = resource_consumption_estimator
        
        # Action space: shape (2016, 34) - 2016 time steps for 34 control setpoints
        self.action_space = spaces.Box(low=0, high=1, shape=(2016, 34), dtype=np.float64)
        
        # Observation space: weather (2016, 10), crop parameters (1, 3), resource consumption (1, 5)
        self.observation_space = spaces.Dict({
            'weather': spaces.Box(low=0, high=1, shape=(2016, 10), dtype=np.float64),
            'crop_params': spaces.Box(low=0, high=1, shape=(1, 3), dtype=np.float64),
            'resource_consumption': spaces.Box(low=0, high=7, shape=(1, 5), dtype=np.float64)
        })
        
        # Initial state
        self.weather_data = weather_data
        self.current_step = 0
        self.steps = 0
        self.max_steps = 23

    def reset(self, seed=None):
        if seed is not None:
            np.random.seed(seed)
        self.steps = 0
        
        # Random initial control setpoints
        control_setpoints = np.random.uniform(0, 1, size=(2016, 34)).reshape(2016, 34)
        day = np.array([(i // 288 + 1) / 166 * (self.steps + 1) for i in range(2016)]).reshape(2016, 1)
        initial_control_setpoints = np.concatenate([control_setpoints, day], axis=1).reshape(1, 2016, 35)
        
        initial_crop_params = np.zeros(3).reshape(1, 3)
        initial_resource_consumption = np.zeros(5).reshape(1, 5)
        
        # Return the initial state
        self.state = {
            'weather': self.weather_data[self.steps  * 2016:(self.steps  + 1) * 2016],
            'crop_params': initial_crop_params,
            'resource_consumption': initial_resource_consumption
        }
        return self.state, {}

    def step(self, action):
        self.steps += 1
        day = np.array([(i // 288 + 1) / 166 * (self.steps + 1) for i in range(2016)]).reshape(2016, 1)
        control_setpoints = np.concatenate([action.reshape(2016, 34), day], axis=1).reshape(1, 2016, 35)
        
        # Predict crop parameters using the crop LSTM model
        weekly_crop_params = self.crop_parameters_estimator.predict(control_setpoints)
        
        # Average daily control setpoints for resource consumption
        a = action.reshape(7, 288, 34)
        actions = np.array([a[i].mean(axis=0) for i in range(7)])
        actions = pd.DataFrame(actions, columns=[
            'AssimLight', 'BlackScr', 'CO2air', 'Cum_irr', 'EC_drain_PC', 'EnScr', 'HumDef', 
            'PipeGrow', 'PipeLow', 'Rhair', 'Tair', 'Tot_PAR', 'Tot_PAR_Lamps', 'VentLee', 
            'Ventwind', 'assim_vip', 'co2_dos', 'co2_vip', 'dx_vip', 'int_blue_vip', 'int_farred_vip', 
            'int_red_vip', 'int_white_vip', 'pH_drain_PC', 'scr_blck_vip', 'scr_enrg_vip', 
            't_grow_min_vip', 't_heat_vip', 't_rail_min_vip', 't_ventlee_vip', 't_ventwind_vip', 
            'water_sup', 'water_sup_intervals_vip_min', 'window_pos_lee_vip'
        ])
        daily_resource_consumption = self.resource_consumption_estimator.predict(actions).sum(axis=0)[:-1].reshape(1, 5)
        #print(self.resource_consumption_estimator.predict(actions)[:])
        for i in self.resource_consumption_estimator.predict(actions)[:]:
            #print(np.any(i>=.9))
            high_rc = np.any(i>=.9)
            #print(high_rc)
            if high_rc:
                #print('break')
                break
        # Update state
        self.state = {
            'weather': self.weather_data[self.steps * 2016:(self.steps + 1) * 2016],
            'crop_params': weekly_crop_params,
            'resource_consumption': daily_resource_consumption
        }
        print(np.max(action),np.min(action),np.any(action>1),np.any(action<0))
        # Calculate reward
        reward = self.calculate_reward(weekly_crop_params[0], daily_resource_consumption[0],high_rc,action)
        #print(np.any(weekly_crop_params[0]>=1 ) , np.any(daily_resource_consumption[0]>=7),high_rc)
        done = bool((self.steps >= self.max_steps)) #or np.any(weekly_crop_params[0]>=.9 ) or np.any(daily_resource_consumption[0]>=6) or high_rc)
        #print('done',done)
        #rewards.append(reward)
        
        return self.state, reward, done, done, {}

    def calculate_reward(self, crop_params, resource_consumption,high_rc,current_actions):
        punishment=0
        big_reward=0
        alpha, beta, delta,gamma = 1, 1, .1,0.01 #1, 0.5, 0.1
        w1, w2, w3 = 0.6, 0.6, 0.6
        p1, p2, p3, p4 = 0.20, 0.35, 0.3, 0.15
        #w1, w2, w3 = 1, 1, 1
        #p1, p2, p3, p4 = 1, 1, 1, 1
        max_stem_elong, max_stem_thick, max_cum_trusses = 1.0, 1.0, 1.0
        max_heat, max_co2, max_electricity, max_irrigation = 7.0, 7.0, 7.0, 7.0

        crop_reward = (w1 * (crop_params[0] / max_stem_elong) +
                       w2 * (crop_params[1] / max_stem_thick) +
                       w3 * (crop_params[2] / max_cum_trusses))
        resource_penalty = (p1 * (resource_consumption[0] / max_heat) +
                            p2 * (resource_consumption[1] / max_co2) +
                            p3 * ((resource_consumption[2] + resource_consumption[3]) / max_electricity) +
                            p4 * (resource_consumption[4] / max_irrigation))
        #if high_rc:
        #    punishment=-.1
        if np.any(current_actions>1 ) or np.any(current_actions<0):
            punishment-=.9
        if np.any(resource_consumption>7):
            print('resource_consumption>7',np.any(resource_consumption>7))
            punishment-=.9
        if np.any(crop_params<.5):
            print('crop_params<.5',np.any(crop_params<.5))
            punishment-=.22
        if np.all(crop_params>=.5):
            print('crop_params>=.5',np.all(crop_params>=.5))
            big_reward=.01
        if np.any(crop_params>=.7):
            print('crop_params>=.7',np.any(crop_params>=.7))
            big_reward+=.1
        if np.any(crop_params>=.8):
            print('crop_params>=.8',np.any(crop_params>=.8))
            big_reward+=1
        if np.all(crop_params>=.5):
            print('all crop_params>=.5',np.all(crop_params>=.5))
            big_reward+=1
        if np.all(crop_params>=.7):
            print('all crop_params>=.7',np.a11(crop_params>=.7))
            big_reward+=1
        
        efficiency_factor = crop_reward / (1 + resource_penalty)
        # Stability penalty
        s = 0.01
        max_delta_action = 34.0  # Assuming actions are normalized between 0 and 1
        stability=0
        #print(len(current_actions)-1)
        for idx in range(len(current_actions)-1):
            stability+=np.sum(np.abs(current_actions[idx]-current_actions[idx+1]))/ max_delta_action
            #print('Action Difference',stability)
        stability_penalty = s * stability
        #print(stability_penalty)
        reward = alpha * crop_reward - beta * resource_penalty + delta * efficiency_factor+punishment+big_reward- gamma * stability_penalty
        print(reward)
        #reward = alpha * crop_reward - beta * resource_penalty + delta * efficiency_factor+punishment+big_reward
        #print(reward)
        return reward
    
# Assuming weather_data is a preprocessed time series array for the environment
weather_data = pd.read_csv('C:\\Users\\Iman.Hindi\\Desktop\\Iman\\AGHC\\AutonomousGreenHouseChallenge\\Code\\Training\\weather_fill_missing_values.csv',index_col='%time')#pd.read_csv('D:\\Iman\\AGHC\\CherryTomato\\Data\\Weather\\Weather.csv',index_col='%time') #  # Example placeholder data #np.random.rand(48384, 10)  # Placeholder data
#print(weather_data.head())
#print(weather_data.shape)
#print(weather_data.isnull().sum())
scaler=MinMaxScaler()
weather_data=scaler.fit_transform(weather_data)[:2016*23]
#print(weather_data[:10])
#print(weather_data.shape)

# Initialize the environment
env = GreenhouseEnv(crop_parameters_estimator, resource_consumption_estimator, weather_data)
check_env(env)
import os
# Define a folder for saving logs and models
log_dir = "./PPO_logs/"
os.makedirs(log_dir, exist_ok=True)

# Define the policy and agent
model = PPO("MultiInputPolicy", env, verbose=0, learning_rate=1e-5,tensorboard_log=log_dir,device="cuda")# Neural network architecture) #MultiInputLstmPolicy #MultiInputPolicy



In [None]:
# Training the agent and tracking rewards
n_episodes = 10000
episode_rewards = []

for episode in range(n_episodes):
    print(f'Episode {episode + 1}')
    state = env.reset(seed=42)[0]
    done = False
    episode_reward = 0
    model.learn(total_timesteps=100000)
    while not done:
        action = model.predict(state,deterministic=True)[0]
        state, reward, done, _, _ = env.step(action)
        episode_reward += reward
    episode_rewards.append(episode_reward.astype('float32'))
    print(f'Episode Reward: {episode_reward.astype("float32")}')
    
    
    
# Plot training progress
plt.figure(figsize=(14, 7))
plt.plot(episode_rewards, label='Rewards')
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Training Progress')
plt.legend()
plt.grid(True)
plt.show()

# Evaluate the model with advanced evaluation
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10, return_episode_rewards=True)
print(f"Mean Reward: {mean_reward} +/- {std_reward}")

# Save the trained agent
model.save("ppo_greenhouse_agent")


In [None]:
import pandas as pd 

orgin_path = "C:\\Users\\Iman.Hindi\\Desktop\\iman\\AGHC\\AutonomousGreenHouseChallenge\\Code\\Training\\"
filenames = ['AICU', 'Automatoes',
             'Digilog', 'IUACAAS', 
             'Reference', 'TheAutomators'
             ]

T1_Action=pd.read_csv(orgin_path+f'{filenames[0]}Actions.csv',index_col='%time')
T2_Action=pd.read_csv(orgin_path+f'{filenames[1]}Actions.csv',index_col='%time')
T3_Action=pd.read_csv(orgin_path+f'{filenames[2]}Actions.csv',index_col='%time')
T4_Action=pd.read_csv(orgin_path+f'{filenames[3]}Actions.csv',index_col='%time')
T5_Action=pd.read_csv(orgin_path+f'{filenames[4]}Actions.csv',index_col='%time')
T6_Action=pd.read_csv(orgin_path+f'{filenames[5]}Actions.csv',index_col='%time')


T1_Results=pd.read_csv(orgin_path+f'{filenames[0]}Results.csv',index_col='%time')
T2_Results=pd.read_csv(orgin_path+f'{filenames[1]}Results.csv',index_col='%time')
T3_Results=pd.read_csv(orgin_path+f'{filenames[2]}Results.csv',index_col='%time')
T4_Results=pd.read_csv(orgin_path+f'{filenames[3]}Results.csv',index_col='%time')
T5_Results=pd.read_csv(orgin_path+f'{filenames[4]}Results.csv',index_col='%time')
T6_Results=pd.read_csv(orgin_path+f'{filenames[5]}Results.csv',index_col='%time')

T1_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[0]}resource_consumption.csv',index_col='%time')
T2_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[1]}resource_consumption.csv',index_col='%time')
T3_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[2]}resource_consumption.csv',index_col='%time')
T4_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[3]}resource_consumption.csv',index_col='%time')
T5_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[4]}resource_consumption.csv',index_col='%time')
T5_resource_Consumption=pd.read_csv(orgin_path+f'{filenames[5]}resource_consumption.csv',index_col='%time')

In [None]:
#episode_rewards = []
#model_actions=np.empty(action.shape)
#model_results=[]
#model_r_consumption=[]
#n_episodes = 10
resources=['Heat_cons','ElecHigh','ElecLow', 'CO2_cons','Irr']
crop_param=['Stem_elong' ,'Stem_thick','Cum_trusses']
#for episode in range(n_episodes):
#    print(f'Episode {episode + 1}')
#    state = env.reset(seed=42)[0]
#    done = False
#    episode_reward = 0
#    model.learn(total_timesteps=10000)
#    while not done:
#        action = model.predict(state,deterministic=True)[0]
#        model_actions=np.concatenate([np.array(model_actions),action],axis=0)
#        state, reward, done, _, _ = env.step(action)
#        model_results.append(state['crop_params'])
#        model_r_consumption.append(state['resource_consumption'])
#        episode_reward += reward
#    episode_rewards.append(episode_reward.astype('float32'))
#    print(f'Episode Reward: {episode_reward.astype("float32")}')
#    
#    model_results=pd.DataFrame(np.array(model_results),columns=crop_param)
#    model_r_consumption=pd.DataFrame(np.array(model_r_consumption),columns=resources)
#

In [None]:
episode_rewards = []
#model_actions=np.empty(action.shape)
model_results=[]
model_r_consumption=[]
state = env.reset(seed=42)[0]
done = False
episode_reward = 0
i=0

while not done:
        action = model.predict(state,deterministic=True)[0]
        if i==0:
          model_actions=action   
        else: 
          model_actions=np.concatenate([model_actions,action],axis=0)
        print(model_actions.shape)
        i+=1
        #model_actions=np.concatenate([np.array(model_actions),action],axis=0)
        state, reward, done, _, _ = env.step(action)
        model_results.append(state['crop_params'])
        model_r_consumption.append(state['resource_consumption'])
        episode_reward += reward
episode_rewards.append(episode_reward.astype('float32'))
print(f'Episode Reward: {episode_reward.astype("float32")}')


In [None]:
model_actions.shape

In [None]:
model_results=pd.DataFrame(np.array(model_results).reshape(23,3),columns=crop_param)
model_r_consumption=pd.DataFrame(np.array(model_r_consumption).reshape(23,5),columns=resources)
model_actions=pd.DataFrame(np.array(model_actions),columns=T1_Action.drop('days',axis=1).columns)

In [None]:
model_results.set_index(T1_Results.index,inplace=True)

In [None]:
model_r_consumption.set_index(T1_resource_Consumption[:-1].index,inplace=True)

In [None]:

model_actions=model_actions[:len(T5_Action.index)].set_index(T5_Action.index)


In [None]:
model_actions.index

In [None]:
teams=['Team1','Team2','Team3','Team4','Team5','Ours']
teams_Results={ 'Team1': T1_Results,
                'Team2': T2_Results,
                'Team3': T3_Results,
                'Team4': T4_Results,
                'Team5': T5_Results,
                'Ours':  model_results,
                    
                    }
teams_Actions={ 'Team1': T1_Action,
                'Team2': T2_Action,
                'Team3': T3_Action,
                'Team4': T4_Action,
                'Team5': T5_Action,
                'Ours':  model_actions,
                    }
teams_rc={      'Team1': T1_resource_Consumption,
                'Team2': T2_resource_Consumption,
                'Team3': T3_resource_Consumption,
                'Team4': T4_resource_Consumption,
                'Team5': T5_resource_Consumption,
                'Ours':  model_r_consumption,
    
}

In [None]:
#plot Teams results

#plot Teams actions
import matplotlib.pyplot as plt
for result in teams_Results[teams[0]].columns:
    for i in range(len(teams)):
        teams_Results[teams[i]][result].plot(ylabel=result,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
    plt.title(f'green_house_{result}_time_series_plot for all Teams ')
    plt.legend()
    #save_fig(f'green_house_{result}_time_series_plot ')

    plt.show()

In [None]:
#plot Teams actions
import matplotlib.pyplot as plt
for action in teams_Actions[teams[5]].columns:
    for i in range(len(teams)):
        teams_Actions[teams[i]]["2019-12-16 00:00:00" : "2019-12-26 00:00:00"][action].plot(ylabel=action,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
    plt.title(f'green_house_{action}_time_series_plot for all Teams ')
    plt.legend()
    #save_fig(f'green_house_{action}_time_series_plot ')

    plt.show()

In [None]:
#plot Teams rc

#plot Teams actions
import matplotlib.pyplot as plt
for resource in teams_rc[teams[0]].columns[:-1]:
    for i in range(len(teams)):
        teams_rc[teams[i]][resource].plot(ylabel=resource,grid=True, marker=".", figsize=(18, 6),legend=True,label=f'{teams[i]}')
    plt.title(f'green_house_{resource}_time_series_plot for all Teams ')
    plt.legend()
    #save_fig(f'green_house_{resource}_time_series_plot ')

    plt.show()

In [None]:
teams_episode_rewards = {}
for team in teams:
    print(f'Evaluate {team} Actions....')
    
    teams_model_actions=np.empty((2016, 35))
    Our_model_actions=np.empty((2016, 34))
    
    model_results=[]
    model_r_consumption=[]
    state = env.reset(seed=42)[0]
    done = False
    episode_reward = []
    i=0
    while not done:
            if team=='Ours':
                action = model.predict(state,deterministic=True)[0]
                Our_model_actions=np.concatenate([np.array(Our_model_actions),np.array(action)],axis=0)
                state, reward, done, _, _ = env.step(np.array(action))
            else:
                action = teams_Actions[team].iloc[2016*i:2016*(i+1)]
                teams_model_actions=np.concatenate([np.array(teams_model_actions),np.array(action)],axis=0)
                print(action.shape)
                state, reward, done, _, _ = env.step(np.array(action.drop('days',axis=1)))
            i+=1
            
            
            model_results.append(state['crop_params'])
            model_r_consumption.append(state['resource_consumption'])
            episode_reward.append(reward.astype('float32'))
    print(episode_reward)
    teams_episode_rewards[team]=np.array(episode_reward)
    print(f'{team} Episode Reward: {np.array(teams_episode_rewards[team]).sum().astype("float32")}')


In [None]:
teams_episode_rewards['Team1']

In [None]:
teams

In [None]:
teams_episode_rewards_df=pd.DataFrame(teams_episode_rewards,columns=teams)

In [None]:
teams_episode_rewards_df

In [None]:
final_result=pd.DataFrame(teams_episode_rewards_df.sum(axis=0).sort_values(ascending=False))
final_result

In [None]:
plt.bar(x=final_result.index,height=final_result[0])