In [1]:
import os
import re
import csv
from collections import defaultdict

In [2]:
# --- CONFIGURATION ---
LOG_DIR = "benchmark"  # Current directory
RANGES = [10, 100, 1000, 10000]
RUNS = 3
NODES = 3  # Node 0, 1, 2

In [3]:
patterns = {
    "server_puts": re.compile(r"Total Local PUTs:\s+(\d+)"),
    "server_gets": re.compile(r"Total Local GETs:\s+(\d+)"),
    "remote_puts": re.compile(r"Remote PUTs Sent:\s+(\d+)"),
    "remote_gets": re.compile(r"Remote GETs Sent:\s+(\d+)"),
    "total_ops": re.compile(r"Total Ops:\s+(\d+)"),
    "time": re.compile(r"Time Elapsed:\s+([\d\.]+)\s+s"),
    "latency": re.compile(r"Average Latency:\s+([\d\.]+)\s+us"),
}

def parse_file(filepath):
    """Extracts metrics from a single log file."""
    data = {}
    try:
        with open(filepath, 'r') as f:
            content = f.read()
            for key, pattern in patterns.items():
                match = pattern.search(content)
                if match:
                    val = float(match.group(1))
                    if key not in ["time", "latency"]:
                        val = int(val)
                    data[key] = val
                else:
                    data[key] = 0 
        return data
    except FileNotFoundError:
        print(f"Warning: Missing file {filepath}")
        return None

In [4]:
all_rows = []

print(f"Parsing logs in {os.path.abspath(LOG_DIR)}...")

for r in RANGES:
    for run in range(1, RUNS + 1):
        run_nodes = []        
        
        for n in range(NODES):
            filename = f"R{r}_Run{run}_Node{n}.log"
            filepath = os.path.join(LOG_DIR, filename)            
            node_data = parse_file(filepath)
            
            if node_data:
                run_nodes.append(node_data)
                all_rows.append({
                    'KeyRange': r,
                    'Run': run,
                    'RecordType': f"Node{n}",
                    'Throughput': 0,
                    'Latency': node_data['latency'],
                    'TotalOps': node_data['total_ops'],
                    'ServerGets': node_data['server_gets'],
                    'ServerPuts': node_data['server_puts'],
                    'RemoteGets': node_data['remote_gets'],
                    'RemotePuts': node_data['remote_puts'],
                    'Duration': node_data['time']
                })

        if len(run_nodes) == NODES:
            total_sys_ops = sum(d['total_ops'] for d in run_nodes)
            total_server_gets = sum(d['server_gets'] for d in run_nodes)
            total_server_puts = sum(d['server_puts'] for d in run_nodes)
            total_remote_puts = sum(d['remote_puts'] for d in run_nodes)
            total_remote_gets = sum(d['remote_gets'] for d in run_nodes)

            # System Throughput = Total Ops / Total Time
            max_time = max(d['time'] for d in run_nodes)
            sys_throughput = total_sys_ops / max_time

            # System Latency = Average of Node Latencies
            avg_sys_latency = sum(d['latency'] for d in run_nodes) / len(run_nodes)

            all_rows.append({
                'KeyRange': r,
                'Run': run,
                'RecordType': "System",
                'Throughput': sys_throughput,
                'Latency': avg_sys_latency,
                'TotalOps': total_sys_ops,
                'ServerGets': total_server_gets,
                'ServerPuts': total_server_puts,
                'RemoteGets': total_remote_gets,
                'RemotePuts': total_remote_puts,
                'Duration': max_time
            })
        else:
            print(f"Skipping System Aggregate for Range {r} Run {run} (Missing Logs)")

csv_filename = 'benchmark_results.csv'
with open(csv_filename, 'w', newline='') as csvfile:
    fieldnames = [
        'KeyRange', 'Run', 'RecordType', 
        'Throughput', 'Latency', 
        'TotalOps', 'ServerGets', 'ServerPuts', 
        'RemoteGets', 'RemotePuts', 'Duration'
    ]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(all_rows)

print(f"Done! Generated '{csv_filename}'.")

Parsing logs in /home/rishad/DHT/logs/benchmark...
Done! Generated 'benchmark_results.csv'.
