In [28]:
import re
import csv
from collections import defaultdict

In [29]:
def parse_claims(file_path):
    """Parses the claims file and extracts specifications, metrics, and values."""
    claims = []
    with open(file_path, 'r') as file:
        for line in file:
            match = re.match(r'Claim \d+: \|\{(.*?)\}, (.*?), (.*?)\|', line)
            if match:
                specifications_raw, metric, value = match.groups()
                specifications = re.findall(r'\|([^|]+), ([^|]+)\|', specifications_raw)
                claims.append({
                    'specifications': {k.strip(): v.strip() for k, v in specifications},
                    'metric': metric.strip(),
                    'value': value.strip()
                })
    return claims

In [30]:
def profile_claims(claims):
    """Profiles the claims data and computes distributions."""
    name_distribution = defaultdict(int)
    value_distribution = defaultdict(lambda: defaultdict(list))
    metric_distribution = defaultdict(int)

    for claim in claims:
        for name, value in claim['specifications'].items():
            name_distribution[name] += 1
            value_distribution[name][value].append(claim['value'])
        metric_distribution[claim['metric']] += 1

    return name_distribution, value_distribution, metric_distribution

In [31]:
def save_profiling_to_csv_and_log(name_distribution, value_distribution, metric_distribution, output_filename):
    """Saves the profiling results and log to a CSV."""
    with open(output_filename, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        # Write distributions of 'name'
        writer.writerow(['Distributions of name in specifications'])
        writer.writerow(['Key', 'Number of Items'])
        for name, count in name_distribution.items():
            writer.writerow([name, count])

        # Write distributions of 'values'
        writer.writerow([])  # Blank row for separation
        writer.writerow(['Distributions of values for each name of each specification'])
        writer.writerow(['Name', 'Value', 'Occurrences'])
        for name, values in value_distribution.items():
            for value, occurrences in values.items():
                writer.writerow([name, value, len(occurrences)])

        # Write distributions of metrics
        writer.writerow([])  # Blank row for separation
        writer.writerow(['Distributions of metrics'])
        writer.writerow(['Metric', 'Count'])
        for metric, count in metric_distribution.items():
            writer.writerow([metric, count])

In [32]:
def main():
    input_file = 'claims_test.txt'
    output_file = 'NAME_PROFILING.CSV'

    # Parse claims
    claims = parse_claims(input_file)

    # Profile claims
    name_distribution, value_distribution, metric_distribution = profile_claims(claims)

    # Print and save distributions
    print("Distributions of 'name' in specifications:")
    for name, count in name_distribution.items():
        print(f"{name}: {count}")

    print("\nDistributions of 'values' for each name of each specification:")
    for name, values in value_distribution.items():
        print(f"{name}:")
        for value, occurrences in values.items():
            print(f"  {value}: {len(occurrences)} occurrences")

    print("\nDistributions of metrics:")
    for metric, count in metric_distribution.items():
        print(f"{metric}: {count}")

    save_profiling_to_csv_and_log(name_distribution, value_distribution, metric_distribution, output_file)
    print(f"\nProfiling results saved to {output_file}")

if __name__ == '__main__':
    main()



Distributions of 'name' in specifications:
Method: 90
Dataset: 96
Model: 6
Task: 42

Distributions of 'values' for each name of each specification:
Method:
  Pop: 9 occurrences
  BPRMF: 9 occurrences
  SASRec: 9 occurrences
  BM25: 9 occurrences
  UniSRec: 9 occurrences
  VQ-Rec: 9 occurrences
  HaluEval: 12 occurrences
  FACTOOL: 12 occurrences
  Ours: 12 occurrences
Dataset:
  ML-1M: 18 occurrences
  Games: 18 occurrences
  Lastfm: 18 occurrences
  N/A: 6 occurrences
  HaluEval Li et al. (2023a): 36 occurrences
Model:
  Speech-GPT3.5: 2 occurrences
  PerceptiveAgent: 2 occurrences
  PerceptiveAgent -w/o captions: 2 occurrences
Task:
  Performance evaluation: 6 occurrences
  QA: 12 occurrences
  Summarization: 12 occurrences
  Dialogue: 12 occurrences

Distributions of metrics:
N@1: 18
N@5: 18
N@10: 18
BERTScore: 3
Accuracy: 3
Acc.: 9
R: 9
P: 9
F1: 9

Profiling results saved to CLAIMS_PROFILING.CSV
