<a href="https://colab.research.google.com/github/esraaelelimy/carbon_footprint/blob/main/q_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Environment Implementation

In [None]:
# ACTION Reduce by 10%, 20% etc -> increase action space
import argparse
import os
import random
import time
from distutils.util import strtobool

import gymnasium as gym
from gym.spaces import Discrete, Box
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# from stable_baselines3.common.buffers import ReplayBuffer
from torch.utils.tensorboard import SummaryWriter

possible_actions = ["increase by level 1","increase by level 2","increase by level 3","maintain",
           "decrease by level 1","decrease by level 2","decrease by level 3"]

action_for_each_state = ()

In [None]:
# DOUBTS: Should co2 be considered a state space
# state_space1 = (gamma, pue, tdp_watts, config, chips) 
# state_space2 = (gamma1, pue1, tdp_watts1, config1, chips1) 
# difference = ce.state1 - ce.state2 
# difference is positive -> + reward
# difference is negative -> - reward  



# goal -> 395-405 -> terminal state reached 
# budget limit = 100
# current e = 500

# diff = 400



# Environment Implementation 
class env():
    # start from state and then take an action to return next state and the reward in the next state
    def __init__(self, curr_state,termination_co2):
        # 7 actions can be taken 
        self.action_space = Discrete(7)  
        # set start state
        
        self.curr_state = curr_state
        self.termination_co2 = termination_co2
        self.info_action = (0,0,0,0,0)
        super().__init__()

    
    def carbon_emissions(self,curr_state):
        product = 1
        # state_space1 = (gamma, pue, tdp_watts, config, chips) 
        for i in range(len(curr_state)):
            product *= curr_state[i]
        return product
    
       
    def step(self,curr_state):
        # state_space1 = (gamma, pue, tdp_watts, config, chips) 
        # info_action = (0,4,5,6,3)
        reward = 0
        prev_co2 = self.carbon_emissions(curr_state)
        
        # each state component takes an action 
        state_list = list(curr_state)
        actions_list = list(self.info_action)
        
        for i in range(len(state_list)):
            state_list[i] += actions_list[i] 
            
        self.curr_state = tuple(state_list)
        
        new_state = curr_state
        
        new_co2 = self.carbon_emissions(new_state)
        
        old_diff = abs(self.termination_co2 - prev_co2)
        new_diff = abs(self.termination_co2 - new_co2)
        
        if new_diff <= 0.1*self.termination_co2:
            done = True
        else:
            # 100 - 500 = 400 -> old diff
            # 100 - 200 = 200 -> new diff 
            # 200 - 400  = - 200 
            if new_diff - old_diff >0: 
                reward -=1
            else:
                reward += 1
            done = False
        # info could be actions_list ?????????
        info = {}
        return self.curr_state, reward, done, info
        
    # difference between reset and init
    def reset(self, curr_state,termination_co2):
        self.action_space = Discrete(7)   
        
        self.curr_state = curr_state
        
        self.termination_co2 = termination_co2
        
        self.info_action = (0,0,0,0,0)
        
        return curr_state


In [None]:
state = (1,2,3,4,5)
env1 = env(state,100)
prod = env1.step(state)
print(prod)

((1, 2, 3, 4, 5), 1, False, {})


#   RL Model implementation using Keras

In [None]:
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [None]:
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1,states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [None]:
model = build_model(5,7)

In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 5)                 0         
                                                                 
 dense_3 (Dense)             (None, 24)                144       
                                                                 
 dense_4 (Dense)             (None, 24)                600       
                                                                 
 dense_5 (Dense)             (None, 7)                 175       
                                                                 
Total params: 919
Trainable params: 919
Non-trainable params: 0
_________________________________________________________________


In [None]:
# class QNetwork(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.network = nn.Sequential(
#             nn.Linear(np.array(5).prod(), 120),
#             nn.ReLU(),
#             nn.Linear(120, 84),
#             nn.ReLU(),
#             nn.Linear(84, 3),
#         )
#     # x is state
#     def forward(self, x):
#         return self.network(x)


In [None]:
# 40 * 8 * 221 = 320*221 = 70,720 * 20 * 100 = 141,440,000

# Agent Implementation

In [None]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [None]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [None]:
# IT IS STUCK IN ONE EPISODE 
episodes = 10
for episode in range(1, episodes+1):
  # curr_state,termination_co2
    state = env1.reset(state,100)
    done = False
    score = 0 
    
    while not done:
        # env.render()
        action = random.choice([0,6])
        n_state, reward, done, info = env1.step(state)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))


KeyboardInterrupt: ignored

In [None]:
dqn = build_agent(model, 7)
dqn.compile(tf.keras.optimizers.legacy.Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env1, nb_steps=50000, visualize=False, verbose=1)

  super().__init__(name, **kwargs)


Training for 50000 steps ...


TypeError: ignored