# Conection to the Prometheus

In [None]:
from prometheus_api_client import PrometheusConnect
from datetime import datetime, timedelta  
import pandas as pd 
import yaml
import json
prom = PrometheusConnect(url ="http://192.168.56.10:30090", disable_ssl=True)
prom

: 

# Fetch All Metric Names

In [None]:
# Fetch all metric names
all_metrics = prom.all_metrics()
print(f'{len(all_metrics)} metrics found')

# Define the file path where you want to save the metrics
file_path = 'prometheus_metrics.txt'

# Write all metrics to the text file, one per line
with open(file_path, 'w') as file:
    for metric in all_metrics:
        file.write(metric + '\n')

print(f"Metrics have been saved to {file_path}.")

# Query a Metric

In [None]:
# Define a starting point for data collection.
start_time = datetime.now() - timedelta(days=1)

# Current time for the end of data collection
end_time = datetime.now()

# Step size for queries (e.g., "15m" for 15 minutes)
step = '1m'

# Define the metric you want to query
metric = 'node_cpu_seconds_total'

# Perform a range query for the metric
result_range = prom.custom_query_range(
    query=metric,
    start_time=start_time,
    end_time=end_time,
    step=step
    )

print(json.dumps(result_range, indent=4))

## Backup Whole Database

In [None]:
import json
import gzip
import os
from prometheus_api_client import PrometheusConnect
from datetime import datetime, timedelta

# Setup connection
prom = PrometheusConnect(url ="http://192.168.56.10:30090", disable_ssl=True)

# Fetch all metric names
all_metrics = prom.all_metrics()

# Define a starting point for data collection
start_time = datetime.now() - timedelta(days=1)  # Example: 5 years ago
end_time = datetime.now()  # Current time
step = '2m'  # Step size

# if there is no path and dir is not exist will create it recursively
path = f"../data_json/{datetime.now().strftime('%Y%m%d')}/"

os.makedirs(path, exist_ok=True)


for metric in all_metrics:
    # Perform a range query for each metric
    result_range = prom.custom_query_range(
        query=metric,
        start_time=start_time,
        end_time=end_time,
        step=step
    )

    # Define file path
    file_path = path+f'{metric.replace("/", "_")}.json'  # Replace '/' with '_' to avoid path issues

    # Write data to a gzipped JSON file
    with open(file_path, 'wt', encoding='utf-8') as file:
        json.dump(result_range, file, indent=4)

    print(f"Data for {metric} has been saved to {file_path}.")


# Filter JSON

In [None]:
%reset -f
import importlib as imp
import helper as hp
imp.reload(hp)

cpu = hp.filter_filenames(path='../data_json/20240420/json/',
                    substrings=['cpu', 'pod'], 
                    and_or='and')

cpu

# READ JSON

In [None]:
%reset -f
import json
import helper as hp
# Read JSON data from a gzipped file
file_path = '../data_json/20240420/json/node_cpu_seconds_total.json'
hp.read_json(file_path)

In [None]:
%reset -f
import os
import json
import pandas as pd
import helper as hp
from pandas import json_normalize
import importlib as imp
imp.reload(hp)
# list of the file in following directory
path = '../data_json/20240420/json/'
files = hp.filter_filenames(path='../data_json/20240420/json/',
                            substrings=['cpu'], 
                            and_or='and')



import time
# node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate.json

# Capturing the output
from io import StringIO
import sys

# Redirect stdout to capture print statements
old_stdout = sys.stdout
sys.stdout = mystdout = StringIO()

for f in files:
    print(f)
print('\n\n')

for f in files:
    print(str(f))
    hp.read_json(path+f)

# Restore stdout to original
sys.stdout = old_stdout

# Get the captured output
output = mystdout.getvalue()

# Writing the output to a text file
with open('output.txt', 'w') as f:
    f.write(output)

# JSON to Pandas DataFrame

In [None]:
"node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate.json"
"container_cpu_usage_seconds_total.json"

In [None]:
%reset -f
import helper as hp
import importlib as imp
imp.reload(hp)

path = '../data_json/20240420/json/'

f1 = "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests.json"
f2 = "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate.json"
f3 = "container_cpu_usage_seconds_total.json"

dfs_memory = hp.transform_data(hp.read_json(path+f2))

# CPU Usage

In [None]:
%reset -f

from prometheus_api_client import PrometheusConnect
from datetime import datetime, timedelta  
import pandas as pd 
import yaml
import json

# Initialize Prometheus Connection
prom = PrometheusConnect(url ="http://192.168.56.10:30090", disable_ssl=True)


def get_total_cpu_usage_for_namespace(metric_name, namespaces, aggregation_time, start_time=None, end_time=None, step='60s', per_pod=False):

    # Initialize the result dictionary
    results = {}

    # Loop through each namespace and fetch metrics
    for namespace in namespaces:
        label_part = f'namespace="{namespace}"'
        
        # Modify the PromQL query to sum across all pods in the namespace
        
        if per_pod:
            query = f'sum(rate({metric_name}{{{label_part}}}[{aggregation_time}])) by (pod)'
        else:
            query = f'sum(rate({metric_name}{{{label_part}}}[{aggregation_time}]))'


        # Use default times if not specified
        if not start_time:
            start_time = datetime.datetime.now() - datetime.timedelta(hours=1)
        if not end_time:
            end_time = datetime.datetime.now()

        # Convert datetime objects to ISO format
        # start_time_iso = start_time.isoformat() + 'Z'
        # end_time_iso = end_time.isoformat() + 'Z'

        # Fetch metrics from Prometheus
        try:
            result = prom.custom_query_range(
                query=query,
                start_time=start_time,
                end_time=end_time,
                step=step
            )
            # Assuming the result structure contains data in a format we expect
            results[namespace] = result
        except Exception as e:
            print(f"Error fetching data for namespace {namespace}: {e}")

    return results


# Define the namespace and the time range
namespaces = ['default', 'kube-node-lease', 'kube-public', 'kube-system', 'kubernetes-dashboard', 'prometheus', 'ros']
start_time = datetime(2024, 4, 20, 0, 0, 0)
end_time = datetime(2024, 4, 20, 23, 59, 59)
step = '60s'  # Step size
metric_name = "container_cpu_usage_seconds_total"
path = "./dataset/"

# 
results = get_total_cpu_usage_for_namespace(metric_name=metric_name, namespaces=namespaces, aggregation_time='1m', start_time=start_time, end_time=end_time, step=step, per_pod=False)
file_name = "container_cpu_usage_seconds_total.json"
file_path = path+file_name
with open(file_path, 'wt', encoding='utf-8') as file:
    json.dump(results, file, indent=4)

# 
results = get_total_cpu_usage_for_namespace(metric_name=metric_name, namespaces=namespaces, aggregation_time='1m', start_time=start_time, end_time=end_time, step=step, per_pod=True)
file_name = "container_cpu_usage_seconds_total_pod.json"
file_path = path+file_name
with open(file_path, 'wt', encoding='utf-8') as file:
    json.dump(results, file, indent=4)



# Memory Usage

In [4]:
%reset -f

from prometheus_api_client import PrometheusConnect
from datetime import datetime, timedelta  
import pandas as pd 
import yaml
import json

# Initialize Prometheus Connection
prom = PrometheusConnect(url ="http://192.168.56.10:30090", disable_ssl=True)


def get_total_memory_usage_for_namespace(metric_name, namespaces, aggregation_time, start_time=None, end_time=None, step='60s', per_pod=False):

    # Initialize the result dictionary
    results = {}

    # Loop through each namespace and fetch metrics
    for namespace in namespaces:
        label_part = f'namespace="{namespace}"'
        
        # Modify the PromQL query to sum across all pods in the namespace
        
        if per_pod:
            # query = f'sum(rate({metric_name}{{{label_part}}}[{aggregation_time}])) by (pod)'
            query = f'sum({metric_name}{{{label_part}}}) by (pod)'
        else:
            # query = f'sum(rate({metric_name}{{{label_part}}}[{aggregation_time}]))'
            query = f'sum({metric_name}{{{label_part}}})'


        # Use default times if not specified
        if not start_time:
            start_time = datetime.datetime.now() - datetime.timedelta(hours=1)
        if not end_time:
            end_time = datetime.datetime.now()

        # Convert datetime objects to ISO format
        # start_time_iso = start_time.isoformat() + 'Z'
        # end_time_iso = end_time.isoformat() + 'Z'

        # Fetch metrics from Prometheus
        try:
            result = prom.custom_query_range(
                query=query,
                start_time=start_time,
                end_time=end_time,
                step=step
            )
            # Assuming the result structure contains data in a format we expect
            results[namespace] = result
        except Exception as e:
            print(f"Error fetching data for namespace {namespace}: {e}")

    return results


# Define the namespace and the time range
namespaces = ['default', 'kube-node-lease', 'kube-public', 'kube-system', 'kubernetes-dashboard', 'prometheus', 'ros']
start_time = datetime(2024, 4, 20, 0, 0, 0)
end_time = datetime(2024, 4, 20, 23, 59, 59)
step = '60s'  # Step size
metric_name = "container_memory_usage_bytes"
path = "./dataset/"



results = get_total_memory_usage_for_namespace(metric_name=metric_name, namespaces=namespaces, aggregation_time='1m', start_time=start_time, end_time=end_time, step=step, per_pod=False)
file_name = "container_memory_usage_bytes.json"
file_path = path+file_name
with open(file_path, 'wt', encoding='utf-8') as file:
    json.dump(results, file, indent=4)


results = get_total_memory_usage_for_namespace(metric_name=metric_name, namespaces=namespaces, aggregation_time='1m', start_time=start_time, end_time=end_time, step=step, per_pod=True)
file_name = "container_memory_usage_bytes_pod.json"
file_path = path+file_name
with open(file_path, 'wt', encoding='utf-8') as file:
    json.dump(results, file, indent=4)



# Network Usage

In [6]:
%reset -f

from prometheus_api_client import PrometheusConnect
from datetime import datetime, timedelta  
import pandas as pd 
import yaml
import json

# Initialize Prometheus Connection
prom = PrometheusConnect(url ="http://192.168.56.10:30090", disable_ssl=True)


def get_network_for_namespace(metric_name, namespaces, aggregation_time, start_time=None, end_time=None, step='60s', per_pod=False):

    # Initialize the result dictionary
    results = {}

    # Loop through each namespace and fetch metrics
    for namespace in namespaces:
        label_part = f'namespace="{namespace}"'
        
        # Modify the PromQL query to sum across all pods in the namespace
        
        if per_pod:
            query = f'sum(rate({metric_name}{{{label_part}}}[{aggregation_time}])) by (pod)'
            # query = f'sum({metric_name}{{{label_part}}}) by (pod)'
        else:
            query = f'sum(rate({metric_name}{{{label_part}}}[{aggregation_time}]))'
            # query = f'sum({metric_name}{{{label_part}}})'


        # Use default times if not specified
        if not start_time:
            start_time = datetime.datetime.now() - datetime.timedelta(hours=1)
        if not end_time:
            end_time = datetime.datetime.now()

        # Convert datetime objects to ISO format
        # start_time_iso = start_time.isoformat() + 'Z'
        # end_time_iso = end_time.isoformat() + 'Z'

        # Fetch metrics from Prometheus
        try:
            result = prom.custom_query_range(
                query=query,
                start_time=start_time,
                end_time=end_time,
                step=step
            )
            # Assuming the result structure contains data in a format we expect
            results[namespace] = result
        except Exception as e:
            print(f"Error fetching data for namespace {namespace}: {e}")

    return results

# ============================================

# container_network_receive_bytes_total
# Define the namespace and the time range
namespaces = ['default', 'kube-node-lease', 'kube-public', 'kube-system', 'kubernetes-dashboard', 'prometheus', 'ros']
start_time = datetime(2024, 4, 20, 0, 0, 0)
end_time = datetime(2024, 4, 20, 23, 59, 59)
step = '60s'  # Step size
metric_name = "container_network_receive_bytes_total"
path = "./dataset/"

results = get_network_for_namespace(metric_name=metric_name, namespaces=namespaces, aggregation_time='1m', start_time=start_time, end_time=end_time, step=step, per_pod=False)

file_name = "container_network_receive_bytes_total.json"
file_path = path+file_name
with open(file_path, 'wt', encoding='utf-8') as file:
    json.dump(results, file, indent=4)

results = get_network_for_namespace(metric_name=metric_name, namespaces=namespaces, aggregation_time='1m', start_time=start_time, end_time=end_time, step=step, per_pod=True)

file_name = "container_network_receive_bytes_total_pod.json"
file_path = path+file_name
with open(file_path, 'wt', encoding='utf-8') as file:
    json.dump(results, file, indent=4)

# ============================================

# container_network_transmit_bytes_total
# Define the namespace and the time range
namespaces = ['default', 'kube-node-lease', 'kube-public', 'kube-system', 'kubernetes-dashboard', 'prometheus', 'ros']
start_time = datetime(2024, 4, 20, 0, 0, 0)
end_time = datetime(2024, 4, 20, 23, 59, 59)
step = '60s'  # Step size
metric_name = "container_network_transmit_bytes_total"
path = "./dataset/"

results = get_network_for_namespace(metric_name=metric_name, namespaces=namespaces, aggregation_time='1m', start_time=start_time, end_time=end_time, step=step, per_pod=False)

file_name = "container_network_transmit_bytes_total.json"
file_path = path+file_name
with open(file_path, 'wt', encoding='utf-8') as file:
    json.dump(results, file, indent=4)

results = get_network_for_namespace(metric_name=metric_name, namespaces=namespaces, aggregation_time='1m', start_time=start_time, end_time=end_time, step=step, per_pod=True)

file_name = "container_network_transmit_bytes_total_pod.json"
file_path = path+file_name
with open(file_path, 'wt', encoding='utf-8') as file:
    json.dump(results, file, indent=4)




# Disk Usage

In [7]:
%reset -f

from prometheus_api_client import PrometheusConnect
from datetime import datetime, timedelta  
import pandas as pd 
import yaml
import json

# Initialize Prometheus Connection
prom = PrometheusConnect(url ="http://192.168.56.10:30090", disable_ssl=True)


def get_total_fs_usage_for_namespace(metric_name, namespaces, aggregation_time, start_time=None, end_time=None, step='60s', per_pod=False):

    # Initialize the result dictionary
    results = {}

    # Loop through each namespace and fetch metrics
    for namespace in namespaces:
        label_part = f'namespace="{namespace}"'
        
        # Modify the PromQL query to sum across all pods in the namespace
        
        if per_pod:
            # query = f'sum(rate({metric_name}{{{label_part}}}[{aggregation_time}])) by (pod)'
            query = f'sum({metric_name}{{{label_part}}}) by (pod)'
        else:
            # query = f'sum(rate({metric_name}{{{label_part}}}[{aggregation_time}]))'
            query = f'sum({metric_name}{{{label_part}}})'


        # Use default times if not specified
        if not start_time:
            start_time = datetime.datetime.now() - datetime.timedelta(hours=1)
        if not end_time:
            end_time = datetime.datetime.now()

        # Convert datetime objects to ISO format
        # start_time_iso = start_time.isoformat() + 'Z'
        # end_time_iso = end_time.isoformat() + 'Z'

        # Fetch metrics from Prometheus
        try:
            result = prom.custom_query_range(
                query=query,
                start_time=start_time,
                end_time=end_time,
                step=step
            )
            # Assuming the result structure contains data in a format we expect
            results[namespace] = result
        except Exception as e:
            print(f"Error fetching data for namespace {namespace}: {e}")

    return results


# Define the namespace and the time range
namespaces = ['default', 'kube-node-lease', 'kube-public', 'kube-system', 'kubernetes-dashboard', 'prometheus', 'ros']
start_time = datetime(2024, 4, 20, 0, 0, 0)
end_time = datetime(2024, 4, 20, 23, 59, 59)
step = '60s'  # Step size
metric_name = "container_fs_usage_bytes"
path = "./dataset/"



results = get_total_fs_usage_for_namespace(metric_name=metric_name, namespaces=namespaces, aggregation_time='1m', start_time=start_time, end_time=end_time, step=step, per_pod=False)
file_name = "container_fs_usage_bytes.json"
file_path = path+file_name
with open(file_path, 'wt', encoding='utf-8') as file:
    json.dump(results, file, indent=4)




results = get_total_fs_usage_for_namespace(metric_name=metric_name, namespaces=namespaces, aggregation_time='1m', start_time=start_time, end_time=end_time, step=step, per_pod=True)
file_name = "container_fs_usage_bytes_pod.json"
file_path = path+file_name
with open(file_path, 'wt', encoding='utf-8') as file:
    json.dump(results, file, indent=4)



# Plots

In [None]:
%reset -f 
import json
import pandas as pd
import helper as hp
import importlib as imp
from pandas import json_normalize
from matplotlib import pyplot as plt
from matplotlib import ticker
from matplotlib import dates as mdates
imp.reload(hp)

def transform_data(data):
    # Convert the string representation of list in the 'values' column to actual lists
    # Explode the 'values' column into multiple rows
    exploded_data = data.explode('values')

    # Split the 'values' column into 'timestamp' and 'value' columns
    exploded_data[['timestamp', 'value']] = pd.DataFrame(exploded_data['values'].tolist(), index=exploded_data.index)


    exploded_data['timestamp'] = pd.to_datetime(exploded_data['timestamp'], unit='s')
    exploded_data.drop(columns=['values'], inplace=True, axis=0)
    exploded_data.fillna("_", inplace=True)
    pivoted_data = exploded_data.pivot(index='timestamp', columns=[col for col in exploded_data.columns if col not in ['value', 'timestamp']], values='value')

    if isinstance(pivoted_data, pd.Series):
        pivoted_data = pd.DataFrame(pivoted_data, columns=['val'])
        pivoted_data.reset_index(drop=False, inplace=True)
        pivoted_data.set_index('timestamp', inplace=True) 
    
    pivoted_data.sort_index(inplace=True) 

    # Remove the name of the column index
    pivoted_data.columns.name = None    
    pivoted_data.index.name = None

    # Identify non-numeric columns
    non_numeric_columns = pivoted_data.select_dtypes(exclude=['int', 'float']).columns

    # Convert non-numeric columns to float
    for col in non_numeric_columns:
        pivoted_data[col] = pd.to_numeric(pivoted_data[col], errors='coerce')

    return pivoted_data

def json_transform(file_path, namespace):
    with open(file_path, 'rt', encoding='utf-8') as file:
        data = json.load(file)
    
    df = json_normalize(data[namespace])
    df = transform_data(json_normalize(data[namespace]))
    return df

def convert_bytes_to_readable(bytes, unit='MB'):
    """Convert bytes to higher units like MB, GB, etc."""
    factor = 1024
    if unit == 'KB':
        return bytes / factor
    elif unit == 'MB':
        return bytes / (factor ** 2)
    elif unit == 'GB':
        return bytes / (factor ** 3)
    elif unit == 'TB':
        return bytes / (factor ** 4)
    else:
        return bytes

file_path = "./dataset/container_memory_usage_bytes_pod.json"
# file_path = "./dataset/container_cpu_usage_seconds_total_pod.json"


namespaces = [ 'kube-system', 'kubernetes-dashboard', 'prometheus', 'ros']

df_ros = json_transform(file_path, namespace= 'ros')
df_kube_system = json_transform(file_path, namespace='kube-system')
df_kubernetes_dashboard = json_transform(file_path, namespace='kubernetes-dashboard')
df_prometheus = json_transform(file_path, namespace='prometheus')

df_ros = convert_bytes_to_readable(df_ros, unit='MB')
df_kube_system = convert_bytes_to_readable(df_kube_system, unit='MB')
df_kubernetes_dashboard = convert_bytes_to_readable(df_kubernetes_dashboard, unit='MB')
df_prometheus = convert_bytes_to_readable(df_prometheus, unit='MB')

dfs_memory = [df_ros, df_kube_system, df_kubernetes_dashboard, df_prometheus]
titles = ['ROS2', 'kube-system', 'kubernetes-dashboard', 'Prometheus']

start = "2024-04-20 09:10:00"
end =   "2024-04-20 15:00:00"

rows = len(dfs_memory)
fig, ax = plt.subplots(rows, 1, figsize=(20, 5*rows), sharex=True)

# Use ScalarFormatter to disable scientific notation
formatter = ticker.ScalarFormatter(useOffset=False)
formatter.set_scientific(False)



for i in range(rows):
    for column in dfs_memory[i].columns:
        ax[i].plot(dfs_memory[i].loc[start:end, column], label=column, linewidth=3)
    ax[i].legend(title=titles[i], loc='upper left', bbox_to_anchor=(1,1)) # , markersize=5, linestyle='-'
    ax[i].grid()
    # Applying the formatter to the y-axes
    ax[i].yaxis.set_major_formatter(formatter)
    ax[i].set_ylabel('Memory Usage (MB)',fontsize=20)
    ax[i].set_title(f"Memory Usage for {titles[i]}", fontsize=20, y=0.98, ha='center', color='black', backgroundcolor='lightgrey', weight='bold')
    ax[i].set_ylim(bottom=0, top=1200)
    ax[i].tick_params(axis='y', labelsize=15)  # Adjust labelsize as needed

ax[-1].tick_params(axis='x', labelsize=15)  # Adjust labelsize as needed
# Set the date format on the x-axis
date_format = mdates.DateFormatter('%Y-%m-%d %H:%M')
ax[-1].xaxis.set_major_formatter(date_format)
plt.subplots_adjust(hspace=0.05)
plt.savefig('memory_usage.pdf', format='pdf', bbox_inches='tight')
plt.show()

In [None]:
%reset -f 
import json
import pandas as pd
import helper as hp
import importlib as imp
from pandas import json_normalize
from matplotlib import pyplot as plt
from matplotlib import ticker
from matplotlib import dates as mdates
imp.reload(hp)

def transform_data(data):
    # Convert the string representation of list in the 'values' column to actual lists
    # Explode the 'values' column into multiple rows
    exploded_data = data.explode('values')

    # Split the 'values' column into 'timestamp' and 'value' columns
    exploded_data[['timestamp', 'value']] = pd.DataFrame(exploded_data['values'].tolist(), index=exploded_data.index)


    exploded_data['timestamp'] = pd.to_datetime(exploded_data['timestamp'], unit='s')
    exploded_data.drop(columns=['values'], inplace=True, axis=0)
    exploded_data.fillna("_", inplace=True)
    pivoted_data = exploded_data.pivot(index='timestamp', columns=[col for col in exploded_data.columns if col not in ['value', 'timestamp']], values='value')

    if isinstance(pivoted_data, pd.Series):
        pivoted_data = pd.DataFrame(pivoted_data, columns=['val'])
        pivoted_data.reset_index(drop=False, inplace=True)
        pivoted_data.set_index('timestamp', inplace=True) 
    
    pivoted_data.sort_index(inplace=True) 

    # Remove the name of the column index
    pivoted_data.columns.name = None    
    pivoted_data.index.name = None

    # Identify non-numeric columns
    non_numeric_columns = pivoted_data.select_dtypes(exclude=['int', 'float']).columns

    # Convert non-numeric columns to float
    for col in non_numeric_columns:
        pivoted_data[col] = pd.to_numeric(pivoted_data[col], errors='coerce')

    return pivoted_data

def json_transform(file_path, namespace):
    with open(file_path, 'rt', encoding='utf-8') as file:
        data = json.load(file)
    
    df = json_normalize(data[namespace])
    df = transform_data(json_normalize(data[namespace]))
    return df

def convert_bytes_to_readable(bytes, unit='MB'):
    """Convert bytes to higher units like MB, GB, etc."""
    factor = 1024
    if unit == 'KB':
        return bytes / factor
    elif unit == 'MB':
        return bytes / (factor ** 2)
    elif unit == 'GB':
        return bytes / (factor ** 3)
    elif unit == 'TB':
        return bytes / (factor ** 4)
    else:
        return bytes



namespaces = ['ros', 'kube-system', 'kubernetes-dashboard', 'prometheus']
titles = ['ROS2', 'kube-system', 'kubernetes-dashboard', 'Prometheus']

dfs_memory = [json_transform(file_path="./dataset/container_memory_usage_bytes_pod.json", namespace=namespace) for namespace in namespaces]
dfs_cpu = [json_transform(file_path="./dataset/container_cpu_usage_seconds_total_pod.json", namespace=namespace) for namespace in namespaces]


dfs_memory = [convert_bytes_to_readable(df, unit='MB') for df in dfs_memory]
# dfs_cpu = [convert_bytes_to_readable(df, unit='MB') for df in dfs_cpu]


start = "2024-04-20 09:10:00"
end =   "2024-04-20 15:00:00"

rows = len(dfs_memory)
fig, ax = plt.subplots(rows, 2, figsize=(30, 5*rows), sharex=True)

#  Use ScalarFormatter to disable scientific notation
formatter = ticker.ScalarFormatter(useOffset=False)
formatter.set_scientific(False)

for i in range(rows):
    for column in dfs_cpu[i].columns:
        ax[i,0].plot(dfs_cpu[i].loc[start:end, column], label=column, linewidth=3)
    # ax[i,0].legend(title=titles[i], loc='upper left', bbox_to_anchor=(1,1)) # , markersize=5, linestyle='-'
    ax[i,0].grid()
    # Applying the formatter to the y-axes
    ax[i,0].yaxis.set_major_formatter(formatter)
    ax[i,0].set_ylabel('CPU Usage',fontsize=20)
    ax[i,0].set_title(f"CPU Usage for {titles[i]}", fontsize=20, y=0.98, ha='center', color='black', backgroundcolor='lightgrey', weight='bold')
    ax[i,0].set_ylim(bottom=0, top=0.26)
    ax[i,0].tick_params(axis='y', labelsize=15)  # Adjust labelsize as needed

ax[-1,0].tick_params(axis='x', labelsize=15)  # Adjust labelsize as needed
# Set the date format on the x-axis
date_format = mdates.DateFormatter('%Y-%m-%d %H:%M')
ax[-1,0].xaxis.set_major_formatter(date_format)


for i in range(rows):
    for column in dfs_memory[i].columns:
        ax[i,1].plot(dfs_memory[i].loc[start:end, column], label=column, linewidth=3)
    ax[i,1].legend(title=titles[i], loc='upper left', bbox_to_anchor=(1,1)) # , markersize=5, linestyle='-'
    ax[i,1].grid()
    # Applying the formatter to the y-axes
    ax[i,1].yaxis.set_major_formatter(formatter)
    ax[i,1].set_ylabel('Memory Usage (MB)',fontsize=20)
    ax[i,1].set_title(f"Memory Usage for {titles[i]}", fontsize=20, y=0.98, ha='center', color='black', backgroundcolor='lightgrey', weight='bold')
    ax[i,1].set_ylim(bottom=0, top=1200)
    ax[i,1].tick_params(axis='y', labelsize=15)  # Adjust labelsize as needed

ax[-1,1].tick_params(axis='x', labelsize=15)  # Adjust labelsize as needed
# Set the date format on the x-axis
date_format = mdates.DateFormatter('%Y-%m-%d %H:%M')
ax[-1,1].xaxis.set_major_formatter(date_format)



plt.subplots_adjust(hspace=0.05, wspace=0.1)
plt.savefig('cpu_memory_usage.pdf', format='pdf', bbox_inches='tight')
plt.show()

In [None]:
%reset -f 
import json
import pandas as pd
import helper as hp
import importlib as imp
from pandas import json_normalize
from matplotlib import pyplot as plt
from matplotlib import ticker
from matplotlib import dates as mdates
imp.reload(hp)

def transform_data(data):
    # Convert the string representation of list in the 'values' column to actual lists
    # Explode the 'values' column into multiple rows
    exploded_data = data.explode('values')

    # Split the 'values' column into 'timestamp' and 'value' columns
    exploded_data[['timestamp', 'value']] = pd.DataFrame(exploded_data['values'].tolist(), index=exploded_data.index)


    exploded_data['timestamp'] = pd.to_datetime(exploded_data['timestamp'], unit='s')
    exploded_data.drop(columns=['values'], inplace=True, axis=0)
    exploded_data.fillna("_", inplace=True)
    pivoted_data = exploded_data.pivot(index='timestamp', columns=[col for col in exploded_data.columns if col not in ['value', 'timestamp']], values='value')

    if isinstance(pivoted_data, pd.Series):
        pivoted_data = pd.DataFrame(pivoted_data, columns=['val'])
        pivoted_data.reset_index(drop=False, inplace=True)
        pivoted_data.set_index('timestamp', inplace=True) 
    
    pivoted_data.sort_index(inplace=True) 

    # Remove the name of the column index
    pivoted_data.columns.name = None    
    pivoted_data.index.name = None

    # Identify non-numeric columns
    non_numeric_columns = pivoted_data.select_dtypes(exclude=['int', 'float']).columns

    # Convert non-numeric columns to float
    for col in non_numeric_columns:
        pivoted_data[col] = pd.to_numeric(pivoted_data[col], errors='coerce')

    return pivoted_data

def json_transform(file_path, namespace):
    with open(file_path, 'rt', encoding='utf-8') as file:
        data = json.load(file)
    
    df = json_normalize(data[namespace])
    df = transform_data(json_normalize(data[namespace]))
    return df

def convert_bytes_to_readable(bytes, unit='MB'):
    """Convert bytes to higher units like MB, GB, etc."""
    factor = 1024
    if unit == 'KB':
        return bytes / factor
    elif unit == 'MB':
        return bytes / (factor ** 2)
    elif unit == 'GB':
        return bytes / (factor ** 3)
    elif unit == 'TB':
        return bytes / (factor ** 4)
    else:
        return bytes



namespaces = ['ros', 'kube-system', 'kubernetes-dashboard', 'prometheus']
titles = ['ROS2', 'kube-system', 'kubernetes-dashboard', 'Prometheus']

dfs_memory = [json_transform(file_path="./dataset/container_memory_usage_bytes_pod.json", namespace=namespace) for namespace in namespaces]
dfs_cpu = [json_transform(file_path="./dataset/container_cpu_usage_seconds_total_pod.json", namespace=namespace) for namespace in namespaces]

# Concatenating DataFrames along the columns
dfs_net = [pd.concat([
    json_transform(file_path="./dataset/container_network_receive_bytes_total_pod.json", namespace=namespace).add_suffix("_receive"),
    -json_transform(file_path="./dataset/container_network_transmit_bytes_total_pod.json", namespace=namespace).add_suffix("_transmit")
    ], axis=1) for namespace in namespaces]
# dfs_rec = [json_transform(file_path="./dataset/container_network_receive_bytes_total_pod.json", namespace=namespace) for namespace in namespaces]
# dfs_trans = [json_transform(file_path="./dataset/container_network_transmit_bytes_total_pod.json", namespace=namespace) for namespace in namespaces]



dfs_memory = [convert_bytes_to_readable(df, unit='GB') for df in dfs_memory]
dfs_net = [convert_bytes_to_readable(df, unit='KB') for df in dfs_net]
dfs_cpu = [df*100 for df in dfs_cpu]


start = "2024-04-20 09:10:00"
end =   "2024-04-20 15:00:00"

rows = len(dfs_memory)
fig, ax = plt.subplots(rows, 3, figsize=(30, 5*rows), sharex=True)

#  Use ScalarFormatter to disable scientific notation
formatter = ticker.ScalarFormatter(useOffset=False)
formatter.set_scientific(False)


for i in range(rows):
    for column in dfs_net[i].columns:
        ax[i,0].plot(dfs_net[i].loc[start:end, column], label=column, linewidth=3)
    # ax[i,0].legend(title=titles[i], loc='upper left', bbox_to_anchor=(1,1)) # , markersize=5, linestyle='-'
    ax[i,0].grid()
    # Applying the formatter to the y-axes
    ax[i,0].yaxis.set_major_formatter(formatter)
    ax[i,0].set_ylabel('Network (kB/s)',fontsize=20)
    ax[i,0].set_title(f"Network Receive/Transmit- {titles[i]}", fontsize=20, y=0.98, ha='center', color='black', backgroundcolor='lightgrey', weight='bold')
    # ax[i,0].set_ylim(bottom=0, top=1200)
    ax[i,0].tick_params(axis='y', labelsize=15)  # Adjust labelsize as needed




    for column in dfs_memory[i].columns:
        ax[i,1].plot(dfs_memory[i].loc[start:end, column], label=column, linewidth=3)
    # ax[i,1].legend(title=titles[i], loc='upper left', bbox_to_anchor=(1,1)) # , markersize=5, linestyle='-'
    ax[i,1].grid()
    # Applying the formatter to the y-axes
    ax[i,1].yaxis.set_major_formatter(formatter)
    ax[i,1].set_ylabel('Memory Usage (GB)',fontsize=20)
    ax[i,1].set_title(f"Memory Usage - {titles[i]}", fontsize=20, y=0.98, ha='center', color='black', backgroundcolor='lightgrey', weight='bold')
    # ax[i,1].set_ylim(bottom=0, top=1200)
    ax[i,1].tick_params(axis='y', labelsize=15)  # Adjust labelsize as needed




    for column in dfs_cpu[i].columns:
        ax[i,2].plot(dfs_cpu[i].loc[start:end, column], label=column, linewidth=3)
    ax[i,2].legend(title=titles[i], loc='upper left', bbox_to_anchor=(1,1)) # , markersize=5, linestyle='-'
    ax[i,2].grid()
    # Applying the formatter to the y-axes
    ax[i,2].yaxis.set_major_formatter(formatter)
    ax[i,2].set_ylabel('CPU Usage %',fontsize=20)
    ax[i,2].set_title(f"CPU Usage - {titles[i]}", fontsize=20, y=0.98, ha='center', color='black', backgroundcolor='lightgrey', weight='bold')
    # ax[i,2].set_ylim(bottom=0, top=0.26)
    ax[i,2].tick_params(axis='y', labelsize=15)  # Adjust labelsize as needed


# Set the date format on the x-axis
date_format = mdates.DateFormatter('%Y-%m-%d %H:%M')

for j in range(3):
    ax[-1,j].tick_params(axis='x', labelsize=15, rotation=60)  # Adjust labelsize as needed
    ax[-1,j].xaxis.set_major_formatter(date_format)
    


plt.subplots_adjust(hspace=0.05, wspace=0.2)
plt.savefig('cpu_memory_net.pdf', format='pdf', bbox_inches='tight')
plt.show()