<a href="https://colab.research.google.com/github/esraaelelimy/carbon_footprint/blob/main/q_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Environment Implementation

In [1]:
!pip install gymnasium



In [2]:
# ACTION Reduce by 10%, 20% etc -> increase action space
import argparse
import os
import random
import time
from distutils.util import strtobool
import pandas as pd
import gymnasium as gym
from gym.spaces import Discrete, Box
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# from stable_baselines3.common.buffers import ReplayBuffer
from torch.utils.tensorboard import SummaryWriter

possible_actions = ["increase by level 1","increase by level 2","increase by level 3","maintain",
           "decrease by level 1","decrease by level 2","decrease by level 3"]

action_for_each_state = ()

In [3]:
gamma = pd.read_csv("Datasets/gamma_modified.csv")
pue = pd.read_csv("Datasets/pue_modified.csv")
gpu = pd.read_csv("Datasets/gpu_modified.csv")

In [4]:
pue.dtypes

Providers     object
PUE          float64
dtype: object

In [5]:
gamma.shape

(220, 3)

In [6]:
pue.shape

(8, 2)

In [7]:
def actions2(dataframe,input_value,column_name,level,info_name):
    current_index = dataframe.index[dataframe[column_name] == input_value][0]
    info_value = None
    new_index = current_index + level
    if new_index <=0:
        new_index = 0
    elif new_index >= dataframe.shape[0]:
        new_index = dataframe.shape[0] - 1 
    value = dataframe.loc[new_index,column_name]
    info_value = dataframe.loc[new_index,info_name]
    return [info_value,value]


In [8]:
actions2(gpu,30,"tdp_watts",-3,"name")

['AGX Xavier', 30]

In [9]:
# state_space1 = (gamma, pue, tdp_watts, config, chips) -> 5
# info_action = (0,4,5,6,3) -> 7 possible actions
# component -> state component's index in the state_space
# action -> action space index
# state -> state list
# value -> is the actual value of the state_space index
# 300 -> 
# state space = [1,2,34,4]
# number of chips : 50
# config: 20
def actions1(component,action,state):
    dict_act = {0:1,1:2,2:3,3:0,4:-1,5:-2,6:-3}
    input_value = state[component]
    if component == 0:
        info_value, output_value = actions2(gamma,input_value,"Gamma",dict_act[action],"Regions")
    elif component == 1:
        info_value, output_value = actions2(pue,input_value,"PUE",dict_act[action],"Providers")
    elif component == 2: 
        info_value, output_value = actions2(gpu,input_value,"tdp_watts",dict_act[action],"name") 
    elif component == 3:
        output_value = input_value + dict_act[action]
        if output_value > 20:
            output_value = 20
        elif output_value <= 0:
            output_value = 1  # maybe change it: 1 grid search
        info_value = "Grid Configurations"
    elif component == 4:
        # 100 -> 5,000
        # actions -> level 1 -> 100 to 101
        # 101 -> 5,050
        output_value = input_value + dict_act[action]
        if output_value > 300:
            output_value = 300
        elif output_value <= 0:
            output_value = 1  # maybe change it : 10 gpus
        info_value = "Number of Chips"
        # 15,000/50 
    return [info_value,output_value]
            
actions1(3,2,[23.916,1.25,125,21,0])     
    

['Grid Configurations', 20]

In [10]:
# DOUBTS: Should co2 be considered a state space
# state_space1 = (gamma, pue, tdp_watts, config, chips) 
# state_space2 = (gamma1, pue1, tdp_watts1, config1, chips1) 
# difference = ce.state1 - ce.state2 
# difference is positive -> + reward
# difference is negative -> - reward  



# goal -> 395-405 -> terminal state reached 
# budget limit = 100
# current e = 500

# diff = 400



# Environment Implementation 
class env():
    # start from state and then take an action to return next state and the reward in the next state
    def __init__(self, curr_state,termination_co2):
        # 7 actions can be taken 
        self.action_space = Discrete(7)          
        self.curr_state = curr_state
        self.termination_co2 = termination_co2
        # self.info_action = (0,0,0,0,0)
        super().__init__()

    
    def carbon_emissions(self,curr_state):
        product = 1
        # state_space1 = (gamma, pue, tdp_watts, config, chips) 
        # 100 -> divided by 5
        # 15,000 -> divided by 100
        for i in range(len(curr_state)):
            product *= curr_state[i]
            product = product/ 1000
        return product*250
    
       
    def step(self,info_action):
        # state_space1 = (gamma, pue, tdp_watts, config, chips) 
        # info_action = (0,4,5,6,3)
        # state_list = [23.916,1.25,125,21,0]
        reward = 0
        prev_co2 = self.carbon_emissions(self.curr_state)
        info_list = []
        # each state component takes an action 
        state_list = list(self.curr_state)
        actions_list = list(info_action)
        # actions1(component,action,state) -> output_value, info_value
        for i in range(len(state_list)):
            
            result = actions1(i,actions_list[i],state_list)
            state_list[i] = result[1]
            info_list.append(result[0])
        
        self.curr_state = state_list
        new_state = self.curr_state
        
        new_co2 = self.carbon_emissions(new_state)
        
        old_diff = abs(self.termination_co2 - prev_co2)
        new_diff = abs(self.termination_co2 - new_co2)
        
        if new_diff <= 0.1*self.termination_co2:
            done = True
            reward += 5
        else:
            # 100 - 500 = 400 -> old diff
            # 100 - 200 = 200 -> new diff 
            # 200 - 400  = - 200 
            if new_diff - old_diff >0: 
                reward -= 1
            else:
                reward += 1
            done = False
        # info could be actions_list ?????????
        info = {0:"The recommended country is "+str(info_list[0]),
               1: "The recommended provider is "+str(info_list[1]),
               2: "The recommended GPU is "+str(info_list[2]),
               3: "The recommended number of Grid Configurations  is "+str(info_list[3])+" (in batches)",
               4: "The recommended number of chips is "+str(info_list[4])+ " (in batches)"}
        return self.curr_state, reward, done, info
        
    # difference between reset and init
    def reset(self, curr_state,termination_co2):
        self.action_space = Discrete(7)   
        self.curr_state = curr_state
        self.termination_co2 = termination_co2
#         self.info_action = (0,0,0,0,0)
        return curr_state


In [11]:
state = [23.916,1.25,125,21,0]
env1 = env(state,100)
prod = env1.step([0,4,5,6,3])
print(prod)

([24.482, 1.2, 85, 18, 1], 1, False, {0: 'The recommended country is Albania', 1: 'The recommended provider is Tencent Cloud', 2: 'The recommended GPU is Intel Xeon E5-2630v4', 3: 'The recommended number of Grid Configurations  is Grid Configurations (in batches)', 4: 'The recommended number of chips is Number of Chips (in batches)'})


#   RL Model implementation using Pytorch

In [12]:
class QNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(5, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 35),
        )

    def forward(self, x):
        return self.network(x)

In [13]:
q_network = QNetwork()
optimizer = optim.Adam(q_network.parameters(), lr=1e-3)
target_network = QNetwork()

In [14]:
# 40 * 8 * 221 = 320*221 = 70,720 * 20 * 100 = 141,440,000
state = torch.tensor(state,dtype= torch.float32)
qval = q_network(state)


# Agent Implementation

In [15]:
# python dqn.py --total-timesteps 500000 \ 220
#     --learning-rate 2.5e-4 \
#     --buffer-size 10000 \
#     --gamma 0.99 \ 0.9
#     --target-network-frequency 500 \ 30
#     --max-grad-norm 0.5 \
#     --batch-size 128 \
#     --start-e 1 \
#     --end-e 0.05 \
#     --exploration-fraction 0.5 \
#     --learning-starts 10000 \ 15
#     --train-frequency 1 
#     --tau / 0.9


In [16]:
def concise(states):
    # tensor of 35 length
    tensor_35 = torch.tensor(states)
    # Reshape the tensor into a 2D array of size 7x5
    tensor_2d = tensor_35.reshape(5, 7)
    # Convert the tensor into a NumPy array
    array_2d = tensor_2d.numpy()
    max_indices = np.argmax(array_2d, axis=1)

    return max_indices

In [17]:
def concise_target(states):
    # tensor of 35 length
    tensor_35 = torch.tensor(states)
    # Reshape the tensor into a 2D array of size 7x5
    tensor_2d = tensor_35.reshape(5, 7)
    # Convert the tensor into a NumPy array
    array_2d = tensor_2d.numpy()
    max_arr = []
    for i in range(len(array_2d)):
        max_val =np.amax(array_2d[i])
        max_arr.append(max_val)
    return max_arr

In [18]:
concise(qval)

  tensor_35 = torch.tensor(states)


array([0, 3, 6, 2, 4])

In [19]:
# 5 rows and 7 columns
def rand_action():
    rand_indices = []
    for i in range(5):
        random_num = random.randint(0,6)
        rand_indices.append(random_num)
    return rand_indices

In [20]:
concise(qval)
print(rand_action())

[5, 6, 4, 1, 3]


  tensor_35 = torch.tensor(states)


In [21]:
def i_to_val(tensor_array, actions):
    # [0,1,2,3,4]
    # tensor_array 35
    # actions -> index
    tensor_35 = torch.tensor(tensor_array)
    # Reshape the tensor into a 2D array of size 7x5
    tensor_2d = tensor_35.reshape(5, 7)
    # Convert the tensor into a NumPy array
    array_2d = tensor_2d.numpy()
    old_val = []
    for i in range(len(actions)):
        old_val.append(array_2d[i][actions[i]])
    return torch.tensor(old_val,requires_grad = True)
    

In [22]:
i_to_val(qval,[0,1,6,5,4])

  tensor_35 = torch.tensor(tensor_array)


tensor([  6.7046,  -8.2481,   8.8695, -11.1665,   8.8082], requires_grad=True)

In [23]:
def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
    slope = (end_e - start_e) / duration
    return max(slope * t + start_e, end_e)

In [24]:
# Reset the environment 
def training(env,terminationco2,current_state):
    for global_step in range(220):
        epsilon = linear_schedule(1,0.05,0.2*150,global_step)
        # prob is more than epsilon -> best action
        q_values = None
        if epsilon < random.random():
            q_values = q_network(torch.Tensor(current_state))
            actions = concise(q_values)

        else:
            actions = rand_action()
        
        next_state, rewards, termination, infos = env.step(actions)
# Plotting, maybe????
        # training 
        # next state
        with torch.no_grad():
            target_values = target_network(torch.Tensor(next_state))
            target_actions = torch.tensor(concise_target(target_values))
            
            # concise function
            # gamma -> 0.9
            td_target = rewards + 0.9 * target_actions * (1 - float(termination))
            td_target = torch.tensor(td_target, requires_grad=True)
        
        # old_val : actions [0,1,6,5,3]
        old_val = torch.tensor(i_to_val(q_network(torch.Tensor(current_state)),actions),requires_grad=True)
#         old_val = q_network(current_state)
        
        loss = F.mse_loss(td_target, old_val)


        # optimize the model
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # target_network_frequency - 30
        # tau - 0.9
        
        if global_step % 30 == 0:
                for target_network_param, q_network_param in zip(target_network.parameters(), q_network.parameters()):
                    target_network_param.data.copy_(
                        0.9 * q_network_param.data + (0.1) * target_network_param.data
                    )
                
        current_state = next_state
    print(current_state)
    for value in infos.values():
        print(value+"/n")
    print("The new carbon estimation is "+str(env.carbon_emissions(current_state)))

        
    # Reset is maybe not needed

In [25]:
training(env1,500,state)
# [23.464, 1.18, 105, 20, 1]
# [24.482, 1.1, 105, 3, 7]


# 58,143 -> 
# 

[311.475, 1.1, 250, 15, 1]
The recommended country is Bolivia/n
The recommended provider is Google Cloud/n
The recommended GPU is Tesla P100/n
The recommended number of Grid Configurations  is Grid Configurations (in batches)/n
The recommended number of chips is Number of Chips (in batches)/n
The new carbon estimation is 3.2120859375e-07


  tensor_35 = torch.tensor(states)
  td_target = torch.tensor(td_target, requires_grad=True)
  tensor_35 = torch.tensor(tensor_array)
  old_val = torch.tensor(i_to_val(q_network(torch.Tensor(current_state)),actions),requires_grad=True)
