In [7]:
import re
import pickle  # if you need it later; not used here

# Path to the log file
log_file_path = '../femnist_logging'

# Regex patterns
finished_pattern = re.compile(r'client (\d+) finished')
selected_pattern = re.compile(r'Selected \d+ participants to run: \[([0-9,\s]+)\]')

# Lists to collect IDs (with duplicates)
finished_ids = []
selected_ids = []

with open(log_file_path, 'r') as f:
    for line in f:
        # Check for "client X finished" lines
        m_fin = finished_pattern.search(line)
        if m_fin:
            finished_ids.append(int(m_fin.group(1)))
        
        # Check for "Selected ... participants" lines
        m_sel = selected_pattern.search(line)
        if m_sel:
            # Extract the comma-separated list inside the brackets
            id_list_str = m_sel.group(1)
            # Split on commas and strip whitespace, convert to int
            ids = [int(x.strip()) for x in id_list_str.split(',') if x.strip()]
            selected_ids.extend(ids)

# Count occurrences
from collections import Counter
finished_counts = Counter(finished_ids)
selected_counts = Counter(selected_ids)

# Find IDs with mismatched counts
all_ids = set(finished_counts) | set(selected_counts)
mismatches = {cid: (finished_counts.get(cid, 0), selected_counts.get(cid, 0))
              for cid in all_ids
              if finished_counts.get(cid, 0) != selected_counts.get(cid, 0)}

# Print results
if mismatches:
    print("IDs with different occurrence counts between 'finished' and 'selected':")
    for cid, (fin_cnt, sel_cnt) in sorted(mismatches.items()):
        print(f"  Client {cid}: finished={fin_cnt}, selected={sel_cnt}")
else:
    print("All client IDs appear the same number of times in both collections.")

IDs with different occurrence counts between 'finished' and 'selected':
  Client 1292: finished=0, selected=1
