In [None]:
import os
import sys

import numpy as np
import matplotlib.pyplot as plt

import tensorforce

from tensorforce.agents import Agent
from tensorforce.environments import Environment
import tensorflow as tf
from helper import *

from datetime import datetime
from time import sleep, time
from collections import deque

# Prometheus
from prometheus_api_client import PrometheusConnect

from metric import Metric


os.environ["CUDA_VISIBLE_DEVICES"]="-1"

print ('TF Version:', tf.__version__)

In [None]:
class CustomEnvironment(Environment):

    def __init__(self, nr_stored_states = 10, max_pods = 5, 
                 reward_function = None, deployments = None):
        """
        Arguments:
            nr_stored_states - number of states to be returned
            max_pods         - maximum number of pods (for actions)
            reward_function  - function that returns the reward. Should have one argument 
                               (dataframe)
            deployments      - list of deployment names. If none, all the deployments from 
                               metric agent are used
        """
        super().__init__()
        
        self.nr_stored_states = nr_stored_states # How many last states should be stored
        self.max_pods = max_pods
        
        
        # Prometheus connection handle
        self.prom = PrometheusConnect(url ="http://127.0.0.1:8001/", disable_ssl=True)
        
        if reward_function is None:
            self.reward_function = self._reward_default
        else:
            self.reward_function = reward_function

        self.metrics = deque(maxlen=nr_stored_states)
        
        
        # Stop environment
        self.envDone = 0
        self.current_step = 0
    
        ### Used for rewards
        self.nrDone = 0
        self.nrDead = 0
        self.nrErr5xx = 0
        
        if deployments is None:
            self.deployments = list(self.metrics2df().controlled_deployment.unique())
        else:
            self.deployments = deployments

            
            
        m = Metric(deployments = self.deployments, prom = self.prom)
        self.metrics.extend([m] * nr_stored_states)
            
            
        ## For debuging
        self.action_list = []
        self.number_resets = 0
    
    def states(self):
        return dict(
            type='float', 
            shape=(len(self.getState()), self.nr_stored_states)
        )
        
    def actions(self):
        return dict(
            type='float', 
            shape=(len(self.deployments), ),
            min_value = 1.0,
            max_value = self.max_pods
        )

    # Optional: should only be defined if environment has a natural fixed
    # maximum episode length; otherwise specify maximum number of training
    # timesteps via Environment.create(..., max_episode_timesteps=???)
    def max_episode_timesteps(self):
        return super().max_episode_timesteps()

    # Optional additional steps to close environment
    def close(self):
        super().close()

    def reset(self):
        # Stop environment
        self.number_resets += 1
        
        self.envDone = 0
        self.current_step = 0
    
        ### Used for rewards
        self.nrDone = 0
        self.nrDead = 0
        self.nrErr5xx = 0
                
        self.action_list = []
                
        return self.getState()

    
    def getState(self):
        self.metrics.append(Metric(deployments = self.deployments, prom = self.prom))
        obs = self.metrics[-1].metricDF.value.to_numpy()
        if len(obs.shape) == 1:
            obs = obs[:, np.newaxis]
        return obs
    
    
    def execute(self, actions):
        self.action_list.append(actions)
        #print (actions)
        
        actions = np.round(actions)
        actions = actions.astype(int)
        
        for deployment, action in zip(self.deployments, actions):
            action = str(int(100*np.round(action)))
            #print (deployment, action)
            subprocess.run(
                ['sh', './shellscripts/set_pods.sh', deployment, action], 
                text=True, capture_output = True)
            
        self.current_step += 1
     
        
        next_state = self.getState()
        terminal = False  # Always False if no "natural" terminal state
        reward = self.reward()
        return next_state, terminal, reward
             
    def _reward_default(self, df):
        
        return 1
    
    def reward(self):
        return self.reward_function(self.metrics2df)
    

In [None]:
environment = Environment.create(
    environment=CustomEnvironment(
        max_pods = 5,
        deployments = ['carts', 'catalogue', 'front-end', 'orders', 'payment', 'shipping', 'user',]
    ), 
    max_episode_timesteps=100,
)
print (environment.getState().shape)
print (environment.deployments, len(environment.deployments))


In [None]:
df = environment.metrics[-1].metricDF
df[(df.resource == 'pod') & (df.deployment.isin(environment.deployments))]

In [None]:
environment.action_list

In [None]:
df = environment.metrics2df().sort_values('controlled_deployment')
df = df[(df.resource == 'pod') & (df.controlled_deployment.isin(environment.deployments))]
dfres = pd.DataFrame(columns = list(df.controlled_deployment))
dfres.loc[datetime.now(), :] = list(df.value)
environment.execute([4, 5, 4, 5, 4, 5, 4] * 7);
for i in range(30):
    print (f'Iteration {i}/30, datetime: {datetime.now()}        \r', end='')
    df = environment.metrics2df().sort_values('controlled_deployment')
    df = df[(df.resource == 'pod') & (df.controlled_deployment.isin(environment.deployments))]
    dfres.loc[datetime.now(), :] = list(df.value)
    sleep(1)
dfres_up = dfres.copy()
dfres_up.head()

In [None]:
dfres_up.iloc[0:20].plot()

In [None]:
df = environment.metrics2df().sort_values('controlled_deployment')
df = df[(df.resource == 'pod') & (df.controlled_deployment.isin(environment.deployments))]
dfres = pd.DataFrame(columns = list(df.controlled_deployment))
dfres.loc[datetime.now(), :] = list(df.value)
environment.execute([1] * 7 );
for i in range(200):
    print (f'Iteration {i}/200, datetime: {datetime.now()}        \r', end='')
    df = environment.metrics2df().sort_values('controlled_deployment')
    df = df[(df.resource == 'pod') & (df.controlled_deployment.isin(environment.deployments))]
    dfres.loc[datetime.now(), :] = list(df.value)
    sleep(1)
dfres_down = dfres.copy()    
dfres_down.head()

In [None]:
dfres_down = dfres.copy() 
dfres_down.plot()


In [None]:
USE_RUNNER = False
if USE_RUNNER:
    agent = Agent.create(agent='random', environment=environment)   
    runner = tensorforce.execution.Runner(
        agent=agent,
        environment=environment,
        max_episode_timesteps=100
    )
    runner.run(num_episodes=100)

In [None]:
agent = Agent.create(agent='random', environment=environment)   

In [None]:
df = environment.metrics2df().sort_values('controlled_deployment')
df = df[(df.resource == 'pod') & (df.controlled_deployment.isin(environment.deployments))]
df_pods = pd.DataFrame(columns = list(df.controlled_deployment))
df_actions = df_pods.copy()

for episode_nr in range(5):
    episode_states = list()
    episode_internals = list()
    episode_actions = list()
    episode_terminal = list()
    episode_reward = list()

    states = environment.reset()
    internals = agent.initial_internals()
    terminal = False
    while not terminal:
        episode_states.append(states)
        episode_internals.append(internals)
        actions, internals = agent.act(
            states=states, internals=internals, independent=True, deterministic=False
        )
        episode_actions.append(actions)
        states, terminal, reward = environment.execute(actions=actions)
        episode_terminal.append(terminal)
        episode_reward.append(reward)
        terminal = True
        
    if False: # No experience/update for random agent    
        agent.experience(
            states=episode_states, internals=episode_internals,
            actions=episode_actions, terminal=episode_terminal,
            reward=episode_reward
        )
    
        agent.update()

    df = environment.metrics2df().sort_values('controlled_deployment')
    df = df[(df.resource == 'pod') & (df.controlled_deployment.isin(environment.deployments))]
    
    t = datetime.now()
    df_pods.loc[t, :] = list(df.value)
    df_actions.loc[t, :] = list(actions)    
    
    for i in range(10):
        sleep(3)
        print (f'Episode {episode_nr} ({i+1}/10).         \r', end='')
        df = environment.metrics2df().sort_values('controlled_deployment')
        df = df[(df.resource == 'pod') & (df.controlled_deployment.isin(environment.deployments))]
        t = datetime.now()
        df_pods.loc[t, :] = list(df.value)
    df_actions.loc[t, :] = list(actions)    
    

In [None]:
df_actions.plot()
df_pods.plot()