In [47]:
import json
import os
import csv
from google.colab import drive

In [48]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [49]:
import json
import os

def calculate_average_durations(span, parent_service=None, averages=None, counts=None, delays=None):
    if averages is None:
        averages = {}
    if counts is None:
        counts = {}
    if delays is None:
        delays = {}

    process = span.get("process", {})
    service_name = process.get("serviceName")

    if parent_service and service_name:
        key = (parent_service, service_name)
        duration = span.get("duration", 0)
        delay = span.get("relativeStartTime", 0)
        averages[key] = averages.get(key, 0) + duration
        counts[key] = counts.get(key, 0) + 1
        delays[key] = delays.get(key, 0) + delay

    references = span.get("references", [])

    for reference in references:
        child_span = reference.get("span")
        calculate_average_durations(child_span, service_name, averages, counts, delays)

    return averages, counts, delays

# Initialize dictionaries to store averages and counts
averages = {}
counts = {}
delays = {}

dir_path = '/content/drive/My Drive/FYP/media-microservices/traces'

# Loop through all files in the directory
for file_name in os.listdir(dir_path):
    if file_name.endswith(".json"):
        file_path = os.path.join(dir_path, file_name)
        with open(file_path, 'r') as json_file:
            data = json.load(json_file).get('data', [])
            for trace in data:
                spans = trace.get('spans', [])
                for span in spans:
                    calculate_average_durations(span, None, averages, counts, delays)

# Exclude keys with "nginx" values from averages and counts dictionaries
keys_to_exclude = set()
for key in counts.keys():
    if "nginx" in key[0] or "nginx" in key[1]:
        keys_to_exclude.add(key)

for key in keys_to_exclude:
    del averages[key]
    del counts[key]
    del delays[key]

# Calculate the final average durations
final_averages = {key: total_duration / counts[key] for key, total_duration in averages.items()}
final_delays = {key: total_delay / counts[key] for key, total_delay in delays.items()}

In [50]:
csv_file_path = '/content/drive/My Drive/FYP/media-microservices/Edges.csv'  # Update the path

with open(csv_file_path, 'w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['Parent Service', 'Child Service', 'Average Duration', 'Delay', 'Count'])

    for key, average_duration in final_averages.items():
        parent_service, child_service = key
        count = counts[key]
        delay = final_delays[key]
        csv_writer.writerow([parent_service, child_service, average_duration, delay, count])

In [51]:
unique_service_names = set()

# For unique_service_names from counts dictionary
unique_service_names.update([service_name for key in counts.keys() for service_name in key])

# Convert set to a list for CSV writing
unique_service_list = list(unique_service_names)

# Write to a new CSV file for unique service names
unique_csv_file_path = '/content/drive/My Drive/FYP/media-microservices/Nodes.csv'

with open(unique_csv_file_path, 'w', newline='') as unique_csv_file:
    csv_writer = csv.writer(unique_csv_file)
    csv_writer.writerow(['Service Name'])

    for service_name in unique_service_list:
        csv_writer.writerow([service_name])