In [27]:
import requests
import json
import logging
from datetime import datetime, timedelta
from dateutil.parser import parse
import pandas as pd

def query_prometheus_range(metric, namespace, pod_name, start_time, end_time, step, server_url="localhost", port=30000):
    """
    Query Prometheus metrics over a given time range.

    Args:
        metric (str): The metric name to query.
        namespace (str): The Kubernetes namespace.
        pod_name (str): The pod name.
        start_time (datetime): The start time for the query.
        end_time (datetime): The end time for the query.
        step (str): The step duration (e.g., '15s', '1m').
        server_url (str): The Prometheus server URL.
        port (int): The Prometheus server port.

    Returns:
        dict: The JSON response from Prometheus.
    """
    query = f'{metric}{{namespace="{namespace}", pod="{pod_name}"}}'

    start_timestamp = int(start_time.timestamp())
    end_timestamp = int(end_time.timestamp())
    bottleneck_start = datetime.fromtimestamp(1716223121.8107705)
    bottleneck_end = datetime.fromtimestamp(1716223362.8116345)
    
    print(start_timestamp, end_timestamp, bottleneck_start, bottleneck_end)
    params = {
        'query': query,
        'start': start_timestamp,
        'end': end_timestamp,
        'step': step
    }

    url = f"http://{server_url}:{port}/api/v1/query_range"
    response = requests.get(url, params=params)

    # if response.status_code == 200:
    #     return response.json()
    # else:
    #     logging.error(f"Error querying Prometheus: {response.status_code} - {response.text}")
    #     return None

    # Process and Format Data
    data = response.json()['data']['result'][0]['values']  # Assuming single metric result
    df = pd.DataFrame(data, columns=['timestamp', 'value'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    df['label'] = df['timestamp'].apply(lambda x: 1 if bottleneck_start <= x <= bottleneck_end else 0)
    df['Date'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    
    # Output
    for index, row in df.iterrows():
        print(f"{row['Date']},{row['value']},{row['label']}")

# Example usage
if __name__ == "__main__":
    metric = "container_cpu_usage_seconds_total"
    namespace = "social-network"
    pod_name = "nginx-thrift-5676c48695-nnknw"
    start_time = datetime.now() - timedelta(hours=1)
    end_time = datetime.now()
    step = "1s"

    result = query_prometheus_range(metric, namespace, pod_name, start_time, end_time, step)
    if result:
        print(json.dumps(result, indent=4))


1716221901 1716225501 2024-05-20 16:38:41.810771 2024-05-20 16:42:42.811635
2024-05-20 16:31:00,0.101008,0
2024-05-20 16:31:01,0.101008,0
2024-05-20 16:31:02,0.101008,0
2024-05-20 16:31:03,0.101008,0
2024-05-20 16:31:04,0.101008,0
2024-05-20 16:31:05,0.101008,0
2024-05-20 16:31:06,0.101008,0
2024-05-20 16:31:07,0.101008,0
2024-05-20 16:31:08,0.101008,0
2024-05-20 16:31:09,0.101008,0
2024-05-20 16:31:10,0.101008,0
2024-05-20 16:31:11,0.101008,0
2024-05-20 16:31:12,0.101008,0
2024-05-20 16:31:13,0.101008,0
2024-05-20 16:31:14,0.101008,0
2024-05-20 16:31:15,0.101008,0
2024-05-20 16:31:16,0.101008,0
2024-05-20 16:31:17,0.101008,0
2024-05-20 16:31:18,0.101008,0
2024-05-20 16:31:19,0.101801,0
2024-05-20 16:31:20,0.101801,0
2024-05-20 16:31:21,0.101801,0
2024-05-20 16:31:22,0.101801,0
2024-05-20 16:31:23,0.101801,0
2024-05-20 16:31:24,0.101801,0
2024-05-20 16:31:25,0.101801,0
2024-05-20 16:31:26,0.101801,0
2024-05-20 16:31:27,0.101801,0
2024-05-20 16:31:28,0.101801,0
2024-05-20 16:31:29,0.101

In [4]:
def get_prometheus_pod_metrics(pod_list, namespace, server_url="localhost", port = 30000, duration=1200):
    metrics = ["container_cpu_usage_seconds_total", 
    "container_memory_usage_bytes", 
    "container_memory_cache"
    "container_memory_failcnt",
    "container_memory_failures_total",
    "container_oom_events_total",
    "container_network_receive_bytes_total", 
    "container_network_receive_errors_total",
    "container_network_receive_packets_dropped_total",
    "container_network_transmit_bytes_total",
    "container_fs_writes_bytes_total",
    "container_fs_reads_bytes_total",
    "container_llc_occupancy_bytes",
    "container_processes",
    "container_sockets",
    "container_threads",
    ]

    metrics_with_no_container_label = ["container_network_receive_bytes_total", 
    "container_network_transmit_bytes_total",
    ]


    for pod in pod_list.items:
        for metric in metrics:
            container = pod.spec.containers[0].name
            pod_name = pod.metadata.name
            #logging.debug(f"Saving metric {metric} of container: {container}")
            query = f'{metric}{{namespace="{namespace}", pod="{pod_name}"}} offset {duration}s'
            # if metric in metrics_with_no_container_label:
            #     query = f'{metric}{{namespace="{namespace}", pod="{pod_name}"}}[{duration}s]'
            # else:
            #     query = f'{metric}{{namespace="{namespace}", container="{container}", pod="{pod_name}"}}[{duration}s]'                
            response =requests.get(f"http://{server_url}:{port}" + '/api/v1/query', params={'query': query})

            print(response.json()['data']['result'][0]["values"], f, indent=4)


In [11]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from dateutil.parser import parse

# Prometheus Configuration
server_url= 'http://localhost'  # Replace with your Prometheus URL
port = '30000'
metric_name = 'container_cpu_usage_seconds_total'  # Replace with the actual metric name

# Time Range & Bottleneck Period (Adjust as needed)
start_time = parse("2024-05-14 22:00:00")
end_time = parse("2024-05-15 00:00:00")
bottleneck_start = parse("2024-05-14 22:48:57")
bottleneck_end = parse("2024-05-14 22:59:11")
pod_name = "nginx-thrift-5676c48695-nnknw"
namespace = "social-network"
query = f'{metric}{{namespace="{namespace}", pod="{pod_name}"}}'

start_timestamp = int(start_time.timestamp())
end_timestamp = int(end_time.timestamp())
params = {
    'query': query,
    'start': start_timestamp,
    'end': end_timestamp,
    'step': step
}
url = f"http://{server_url}:{port}/api/v1/query_range"
response = requests.get(url, params=params)


# Check for errors
if response.status_code != 200:
    print("Error fetching data from Prometheus:", response.json()['error'])
    exit(1)

# Process and Format Data
data = response.json()['data']['result'][0]['values']  # Assuming single metric result
df = pd.DataFrame(data, columns=['timestamp', 'value'])
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
df['label'] = df['timestamp'].apply(lambda x: 1 if bottleneck_start <= x <= bottleneck_end else 0)
df['Date'] = df['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')

# Output
for index, row in df.iterrows():
    print(f"{row['Date']},{row['value']},{row['label']}Add comment")


ConnectionError: HTTPConnectionPool(host='http', port=80): Max retries exceeded with url: //localhost:30000/api/v1/query_range?query=container_cpu_usage_seconds_total%7Bnamespace%3D%22social-network%22%2C+pod%3D%22nginx-thrift-5676c48695-nnknw%22%7D&start=1715724000&end=1715731200&step=1s (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7a3080f60550>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))