In [None]:
from datasets import load_dataset
from evaluate import load
from pathlib import Path
import os
import yaml
import re
import math

In [None]:
path_to_predictions = r''  # path to .json file
path_to_save_metrics = r''

In [None]:
dataset = load_dataset('json', data_files=path_to_predictions)['train']

In [None]:
pattern = r'[a-z]+[0-9]*|[A-Z](?:[a-z]+[0-9]*|[A-Z]*[0-9]*(?=[A-Z]|$))'

def split(example):
    example['words_labels'] = ' '.join(re.findall(pattern, example['labels'])).lower()
    example['words_predictions'] = ' '.join(re.findall(pattern, example['prediction'])).lower()
    return example

In [None]:
predictions_dataset = dataset.map(split)

In [None]:
# for each example we extract the body from code (string between { and })
def extract_body(example):
    code = example['input_code']
    body = code[code.find('{') + 1:code.rfind('}')]
    example['body'] = body
    return example

In [None]:
predictions_dataset = predictions_dataset.map(extract_body)

In [None]:
# count the number of lines for each body
def count_lines(example):
    example['nb_lines'] = len(example['body'].split('\n')) - 2
    return example

In [None]:
predictions_dataset = predictions_dataset.map(count_lines)

In [None]:
ranges = [[0, 5, 0], [5, 10, 0], [10, 20, 0], [20, 50, 0], [50, 100, 0], [100, 200, 0], [200, 500, 0], [500, math.inf, 0]]

In [None]:
# count the number of examples for each range
def count_examples(example):
    for r in ranges:
        if r[0] <= example['nb_lines'] < r[1]:
            r[2] += 1
            break

In [None]:
predictions_dataset.map(count_examples)

In [None]:
metrics = {}

In [None]:
# for each range calculate the metrics
rouge = load('rouge')
# for each range we have a dict with the metrics
metrics['rouge'] =  {
    'rouge1': {},
    'rouge2': {},
    'rougeL': {},
    'rougeLsum': {}
}

for range in ranges:
    filtered_dataset = predictions_dataset.filter(lambda example: range[0] <= example['nb_lines'] < range[1])

    if len(filtered_dataset) == 0:
        for k, v in metrics['rouge'].items():
            v[f'{range[0]}-{range[1]}'] = 0
        continue

    results = rouge.compute(
        predictions=filtered_dataset['words_predictions'],
        references=filtered_dataset['words_labels'])

    for k, v in results.items():
        metrics['rouge'][k][f'{range[0]}-{range[1]}'] = float(v)

In [None]:
Path(path_to_save_metrics).mkdir(parents=True, exist_ok=True)

In [None]:
with open(os.path.join(path_to_save_metrics, 'metrics.yml'), 'w') as f:
    yaml.dump(metrics, f)