In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker # For formatting ticks
import numpy as np
import os

# --- Parameters to set ---

# Input file: Path to the CSV file generated by the previous script
# Example: 'transactions_processed_span_100ms.csv'
input_file = 'your_processed_file.csv' # <--- *** CHANGE THIS TO YOUR FILENAME ***

# Unit size: How many spans (rows in the input CSV) make up one unit for aggregation.
# For example, if the input CSV represents 100ms spans, and unit_size is 5,
# then each unit on the plot will represent 5 * 100ms = 500ms.
unit_size = 5 # <--- *** CHANGE THIS AS NEEDED ***

# --- Sanity Checks ---
if not os.path.exists(input_file):
    raise FileNotFoundError(f"Error: Input file '{input_file}' not found. Please check the path.")
if not isinstance(unit_size, int) or unit_size <= 0:
    raise ValueError("Error: 'unit_size' must be a positive integer.")

print(f"Configuration:")
print(f"- Input file: {input_file}")
print(f"- Unit size: {unit_size} spans per unit")

In [None]:
# Read the CSV file using pandas
try:
    df = pd.read_csv(input_file)
    print(f"Successfully loaded {input_file}. Shape: {df.shape}")
    # Display first few rows to verify
    print("\nFirst 5 rows of the input data:")
    print(df.head())
except Exception as e:
    raise RuntimeError(f"Error reading CSV file '{input_file}': {e}")

# Check if necessary columns exist
required_columns = ['from_start', 'ok', 'error']
if not all(col in df.columns for col in required_columns):
    raise ValueError(f"Error: Input CSV must contain columns: {required_columns}. Found: {df.columns.tolist()}")

# Calculate the unit index for each row
# We use integer division on the row index. Rows 0 to unit_size-1 belong to unit 0,
# rows unit_size to 2*unit_size-1 belong to unit 1, and so on.
df['unit_index'] = df.index // unit_size

# Group by the calculated unit index and sum the 'ok' and 'error' counts
# We also sum 'from_start' to get the end time of the last span in the unit,
# though we primarily use the unit index for plotting.
aggregated_df = df.groupby('unit_index')[['ok', 'error', 'from_start']].sum()

# Create a 'unit_number' column (starting from 1 instead of 0 for plotting)
aggregated_df['unit_number'] = aggregated_df.index + 1

# Set unit_number as the primary index for easier plotting if needed,
# or keep it as a column. Keeping it as a column is often flexible.
# aggregated_df = aggregated_df.set_index('unit_number') # Optional

print("\nAggregated data per unit:")
print(aggregated_df.head())
print(f"\nTotal units created: {len(aggregated_df)}")

In [None]:
print("\nGenerating plot...")

# Set a nice visual style (optional, requires seaborn installed: pip install seaborn)
try:
    import seaborn as sns
    sns.set_theme(style="whitegrid")
    print("Using Seaborn style.")
except ImportError:
    print("Seaborn not found, using default Matplotlib style.")
    plt.style.use('seaborn-v0_8-gird') # Use a visually appealing default style if possible


# Create the plot
fig, ax = plt.subplots(figsize=(12, 6)) # Adjust figure size as needed

# Plot the 'ok' counts
ax.plot(aggregated_df['unit_number'], aggregated_df['ok'],
        marker='o', linestyle='-', color='green', linewidth=2, markersize=6, label='OK Count')

# Plot the 'error' counts
ax.plot(aggregated_df['unit_number'], aggregated_df['error'],
        marker='s', linestyle='--', color='red', linewidth=2, markersize=6, label='Error Count')

# --- Customize Appearance ---

# Set Title and Labels
ax.set_title(f'Transaction Counts per Unit (Unit Size = {unit_size} spans)', fontsize=16)
ax.set_xlabel('Unit Number', fontsize=12)
ax.set_ylabel('Count', fontsize=12)

# Add Legend
ax.legend(fontsize=10)

# Customize Ticks
# Ensure x-axis shows integer unit numbers
ax.xaxis.set_major_locator(mticker.MaxNLocator(integer=True))
# Optionally, format y-axis ticks if numbers get very large
# ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, p: format(int(x), ',')))

# Add Grid (Seaborn style adds one, but explicit call gives more control)
ax.grid(True, which='major', linestyle='--', linewidth='0.5', color='grey', alpha=0.7)

# Improve Layout
plt.tight_layout()

# Show the plot
plt.show()

print("Plot generation complete.")