# Bandwidth data extraction

The data is stored in the Prometheus and this script will query prometheus for the specific time slots were a successful call were done, calculate the average used bandwidth and save that as a data point in a csv file.

In [1]:
import sys
from datetime import datetime, timedelta
from pymongo import MongoClient
import pandas as pd
import os
import json
import logging
import helperFunctions as hf
import numpy as np
import requests

hf.setup()

outputFolder = "output_folder/"
outputFile = outputFolder + "SuccessfulCallsUsedBandwidth.csv"


if not os.path.exists(outputFolder):

   # Create a new directory because it does not exist
   os.makedirs(outputFolder)
   logging.info(f"The directory \"{outputFolder}\" is created!")

In [2]:
callsDf = pd.read_csv(outputFolder + "SuccessfulCallsStartAndEnd.csv")

df = pd.DataFrame(columns=["scenario", "client", "bps"])

scenarios = callsDf["scenario"].unique()
for scenario in scenarios:

  scenarioDf = callsDf.loc[callsDf["scenario"] == scenario]

  logging.info(f"scenarioDf {scenario} has {len(scenarioDf)} records")

  for index, row in scenarioDf.iterrows():

    client = row["client"]
    start = row["start"]
    end = row["end"]

    # Get the bandwidth data for the client
    instance = hf.getInstance(client)
    if(instance == None):
      logging.error(f"Could not find instance for client {client}")
      continue

    # Convert format and localtime to UTC
    start = datetime.strptime(start, "%Y-%m-%d %H:%M:%S.%f")
    start = start - timedelta(hours=1)
    if(start < datetime(2023, 1, 6)):
      continue
    start = start.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
    end = datetime.strptime(end, "%Y-%m-%d %H:%M:%S.%f")
    end = end - timedelta(hours=1)
    end = end.strftime("%Y-%m-%dT%H:%M:%S.%fZ")

    query = "/api/v1/query_range?query=irate(node_network_receive_bytes_total{instance=\""+ instance + "\", device=\"eth0\"}[1m]) * 8&start=" + start + "&end=" + end + "&step=15s"

    url = "http://localhost:9090" + query

    response = requests.request("GET", url, headers={}, data={})

    

    if(response.status_code == 200):
      try:
        values = json.loads(response.text)["data"]["result"][0]["values"]
        data = pd.DataFrame(values, columns=["time", "bps"])
        
        # find average of bps
        data["bps"] = data["bps"].astype(float)
        average = data["bps"].mean()

        data = pd.DataFrame(columns=["scenario", "client", "bps"], data=[[scenario, client, average]])
        df = pd.concat([df, data], ignore_index=True)

      except:
        continue


df.to_csv(outputFile, index=False, mode="w", header=True)


      


2023-02-05 15:10:38 INFO     scenarioDf 1 has 2474 records 
2023-02-05 15:57:59 INFO     scenarioDf 8 has 2402 records 
2023-02-05 16:43:45 INFO     scenarioDf 9 has 2356 records 
2023-02-05 17:28:59 INFO     scenarioDf 10 has 1784 records 
2023-02-05 18:02:03 INFO     scenarioDf 11 has 1398 records 
