In [1]:
import tensorflow as tf
import pandas as pd
import requests
from sklearn import preprocessing

In [2]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [3]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [4]:
from src.kube.pod import get_pod_names
from src.prometheus.time_utils import generate_time
from src.prometheus.metrics import metric_labels
from src.prometheus.constants import prometheus_endpoint, excluding_services, prometheus_query

In [5]:
data_length = 420
offsets = [1598877404.0419254, 1598878004.0544617, 1598878604.078713, 1598879204.093746, 1598879804.1056745, 1598880404.122491, 1598881004.1488338, 1598881604.1680448, 1598882204.1864324, 1598882804.1977358, 1598883404.219199, 1598884004.2493558, 1598884604.270563, 1598885204.2820766, 1598885804.289007, 1598886404.3132436, 1598913943.7459676, 1598914543.8185163, 1598915143.8391328, 1598915743.848063, 1598916343.8584847, 1598916943.8682706, 1598917543.8892484, 1598918143.912858, 1598918743.936255, 1598919343.9440253, 1598919943.9653447, 1598920543.972687, 1598921143.98354, 1598921743.997125, 1598922344.0048857, 1598922944.029947, 1598923544.0381558, 1598924144.0608115, 1598924744.0780828, 1598925344.0908113, 1598925944.1309114, 1598926544.1630712, 1598927144.1790442, 1598927744.1974945, 1598928344.2189867, 1598928944.2331138, 1598929544.2421143, 1598930144.2663388, 1598930744.2883592, 1598931344.3096392, 1598931944.3326228, 1598932544.3605652, 1598933144.381381, 1598933744.4063568, 1598934344.4200628, 1598934944.4510636, 1598935544.4720018, 1598936144.4863994, 1598936744.500132, 1598937344.5239713, 1598937944.5362685, 1598938544.5559564] 
offsets_in_ms = [(((int(offset) - data_length + 1)), int(offset)) for offset in offsets]

In [6]:
pods = ['vehicles-service', 'identity-service', 'customers-service', 'deliveries-service', 'orders-service',
        'availability-service', 'parcels-service', 'pricing-service']

In [7]:
print("Is there a GPU available: "),
print(tf.config.experimental.list_physical_devices("GPU"))

Is there a GPU available: 
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [8]:
for job_name in metric_labels:
        for idx, metric in enumerate(metric_labels[job_name]):
            print(metric[1])

label_join(sum by (app, job) (aspnetcore_requests_per_second{app!~"api-gateway-service|order-maker-service|operations-service"}), "instance", "", "app")
label_join(max by (app, job) (rate(aspnetcore_requests_duration_seconds_sum{app!~"api-gateway-service|order-maker-service|operations-service", route=~".+"}[10s])), "instance", "", "app")
label_join(sum by (app, job) (rate(dotnet_gc_pause_ratio{app!~"api-gateway-service|order-maker-service|operations-service"}[10s])), "instance", "", "app")
label_join(sum by (app, job) (rate(dotnet_threadpool_adjustments_total{app!~"api-gateway-service|order-maker-service|operations-service"}[10s])), "instance", "", "app")
label_join(sum by (app, job) (rate(dotnet_threadpool_scheduled_total{app!~"api-gateway-service|order-maker-service|operations-service"}[5s])), "instance", "", "app")
label_join(sum by (app, job) (runtime_threadpool_threads_total{app!~"api-gateway-service|order-maker-service|operations-service"}), "instance", "", "app")
max by (app, in

In [10]:
datasets = []

for offset in offsets_in_ms:
    prom_data_by_pods = {}
    for job_name in metric_labels:
        for metric in metric_labels[job_name]:
            response =requests.get(prometheus_endpoint + prometheus_query,
                                   params={'query': metric[1], 'start': offset[0], 'end': offset[1], 'step':1})
            prometheus_data = response.json()['data']['result']

            for pod in pods:
                metric_data = next(data['values'] for data in prometheus_data if data['metric']['app'] == pod)
                metric_data = map(lambda val: float(val[1]), metric_data)
                prom_data_by_pods[metric[0] + '_' + pod] = metric_data
    datasets.append(pd.DataFrame.from_dict(prom_data_by_pods))

In [11]:
for idx, d_t in enumerate(datasets):
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(d_t)
    datasets[idx] = pd.DataFrame(x_scaled, columns=d_t.columns)

In [12]:
dataset = pd.concat(datasets)
dataset.shape

# Each batch 420 rows

(24360, 72)

In [13]:
dataset.to_csv(f'scrapped_data/not_scaled/{str(int(offsets[0]))}.csv', index = False, header=True)