
# ParTES plotting notebook

## V-1 Raw Data
### V-1.1 Single rank inverted CDF 

Draw plot for the inverted CDF of single rank's measured time in nanoseconds. 

**Usage**: 
- folder: the path of the csv files
- rank: the MPI rank
- ntiles: the number of tiles
- cut_tile: the percentage of the highest tile to draw

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import glob
import os

folder = '.'
rank = 2
# Set total tiles and cut percentage
ntiles = 100
cut_tile = 0.99

# Choose y-axis scale, available: 'log10', 'log2', 'linear'
y_scale = 'linear'

pattern = os.path.join(folder, f'meas_r{rank}_ng*.csv')
csv_files = glob.glob(pattern)

plt.figure(figsize=(10, 6))
for csv_file in csv_files:
    data = np.loadtxt(csv_file)
    sorted_t = np.sort(data)
    n = len(sorted_t)
    
    # Generate ntiles quantile data
    # Map array's ID with quantiles
    q_ids = np.linspace(0, n-1, ntiles, dtype=int)
    # Map percentile with quantiles
    qs = (q_ids + 1) / n
    filt_t = sorted_t[q_ids]
    
    # Filter out tiles > cut_tile*ntiles
    max_tiles = int(cut_tile * ntiles)
    tile_filter = np.arange(ntiles) < max_tiles
    qs = qs[tile_filter]
    filt_t = filt_t[tile_filter]
    
    # Apply y-axis scaling
    if y_scale == 'log10':
        filt_t = np.log10(filt_t)
    elif y_scale == 'log2':
        filt_t = np.log2(filt_t)
    # For 'linear', no transformation needed
    
    ng = os.path.basename(csv_file).split('_ng')[1].split('.csv')[0]
    plt.plot(qs, filt_t, label=f'ng={ng}', marker='x', markersize=1, linestyle='--')

plt.xlabel('Quantile')
plt.ylabel('Time (ns)')
plt.title(f'ICDF Comparison for Rank {rank}')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()


### V-1.2 Single rank Normalized Box

In [None]:
# V-1.2
# Parameters for box plot
folder = '.'  # Folder containing CSV files
rank = 0         # MPI rank to analyze
ntiles = 100     # Number of quantiles (not used for box plot but kept for consistency)
cut_tile = 1.0   # Cut-off for tiles (not used for box plot but kept for consistency)

# Load data for box plot
pattern = os.path.join(folder, f'meas_r{rank}_ng*.csv')
csv_files = glob.glob(pattern)

# Dictionary to store data for each ng value
data_dict = {}
min_times = {}

# First pass: collect all data and find minimum times
for csv_file in csv_files:
    data = np.loadtxt(csv_file)
    ng = os.path.basename(csv_file).split('_ng')[1].split('.csv')[0]
    data_dict[ng] = data
    min_times[ng] = np.min(data)

# Create normalized box plot
plt.figure(figsize=(12, 6))

# Prepare data for box plot
box_data = []
labels = []

for ng in sorted(data_dict.keys(), key=lambda x: int(x)):
    # Normalize data to minimum measured runtime
    normalized_data = data_dict[ng] / min_times[ng]
    box_data.append(normalized_data)
    labels.append(f'ng={ng}')

# Create box plot
plt.boxplot(box_data, labels=labels, patch_artist=True)
plt.xlabel('ng values')
plt.ylabel('Normalized Time (relative to minimum)')
plt.title(f'Normalized Runtime Distribution for Rank {rank}')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## V2 Derived/Statistical Metrics
### V2.1 