In [2]:
import json
import pandas as pd
import zipfile
from transformers import AutoTokenizer
from sklearn.preprocessing import LabelEncoder
import numpy as np
import re

  from .autonotebook import tqdm as notebook_tqdm


In [1]:
import os

# Dynamically set scratch folder to the current working directory
scratch_folder = os.path.join(os.getcwd(), "cache")  # Create a 'cache' folder in your current working directory

# Set environment variables for caching
os.environ["TRANSFORMERS_CACHE"] = os.path.join(scratch_folder, "transformers")
os.environ["TORCH_HOME"] = os.path.join(scratch_folder, "torch")
os.environ["HF_HOME"] = os.path.join(scratch_folder, "huggingface")

# Ensure the cache directories exist
os.makedirs(os.environ["TRANSFORMERS_CACHE"], exist_ok=True)
os.makedirs(os.environ["TORCH_HOME"], exist_ok=True)
os.makedirs(os.environ["HF_HOME"], exist_ok=True)

print(f"Transformers cache directory: {os.environ['TRANSFORMERS_CACHE']}")
print(f"Torch cache directory: {os.environ['TORCH_HOME']}")
print(f"HuggingFace cache directory: {os.environ['HF_HOME']}")

Transformers cache directory: /storage/ice1/4/0/akumar978/BDA Project/cache/transformers
Torch cache directory: /storage/ice1/4/0/akumar978/BDA Project/cache/torch
HuggingFace cache directory: /storage/ice1/4/0/akumar978/BDA Project/cache/huggingface


In [3]:
with open('release_evidences.json', 'r') as f:
    evidence_dict = json.load(f)

with open('release_conditions.json', 'r') as f:
    condition_dict = json.load(f)
    
with open('sample_train_data_200k.json', 'r') as f:
    train_data = json.load(f)
    
with open('sampled_test_combined_data.json','r') as f:
    test_data = json.load(f)

In [5]:
print(train_data[50])

{'instruction': 'Provide Diagnosis', 'input': 'Patient age is 84, sex is M.  Antecedents: Do you have severe Chronic Obstructive Pulmonary Disease (COPD)? Y ; Have you had one or several flare ups of chronic obstructive pulmonary disease (COPD) in the past year? Y ; Do you smoke cigarettes? Y ; Do you have a chronic obstructive pulmonary disease (COPD)? Y ; Have you ever been diagnosed with gastroesophageal reflux? Y ; Do you work in agriculture? Y ; Do you work in construction? Y ; Have you traveled out of the country in the last 4 weeks? N . Symptoms: Do you have a cough that produces colored or more abundant sputum than usual? Y ; Are you experiencing shortness of breath or difficulty breathing in a significant way? Y ; Do you have a cough that produces colored or more abundant sputum than usual? Y ; Do you have a cough? Y ; Have you noticed a wheezing sound when you exhale? Y . ', 'output': ' Differential diagnosis is: Acute COPD exacerbation / infection, Bronchitis, Bronchiectasis

In [19]:
def load_patients(file_path):
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        with zip_ref.open(zip_ref.namelist()[0]) as f:
            return pd.read_csv(f)

In [20]:
train_patients = load_patients('release_train_patients.zip')
val_patients = load_patients('release_validate_patients.zip')
test_patients = load_patients('release_test_patients.zip')

In [48]:
# test_

In [27]:
def create_text_representation(row, output_path):
    # Gather patient information
    age = row['AGE']
    sex = row['SEX']
    pathology = row['PATHOLOGY']
    initial_evidence = row['INITIAL_EVIDENCE']
    evidences = eval(row['EVIDENCES'])
    evidences = [initial_evidence] + evidences
#     For differential diagnosis 
    data = eval(row['DIFFERENTIAL_DIAGNOSIS'])
    differential_diseases = [item[0] for item in data]
    diseases = ', '.join(differential_diseases)

    description = f"Age: {age}, Sex: {sex}. "
    # Add detailed symptoms and antecedents
    symptom_texts = []
    antecedents = []
    for evidence_code in evidences:
        # Separate multi-choice evidence by value
        if "_@_" in evidence_code:
            evidence, value = evidence_code.split('_@_')
            evidence_text = evidence_dict[evidence]['question_en']
            value_text = evidence_dict[evidence]['value_meaning'].get(value)
            value_text = value_text['en'] if value_text is not None else value
            if value_text=='N':
                value_text = 'No'
            if value_text =='Y':
                value_text = 'Yes'
            if value_text == 'NA':
                value_text = 'Not Applicable'
                
            if evidence_dict[evidence]['is_antecedent']:
                antecedents.append(f"{evidence_text}: {value_text}")
            else:
                symptom_texts.append(f"{evidence_text}: {value_text}")
        else:
            if evidence_dict[evidence_code]['is_antecedent']:
                antecedents.append(evidence_dict[evidence_code]['question_en']+'Yes')
            else:
                symptom_texts.append(evidence_dict[evidence_code]['question_en']+'Yes')

    description += "History:" + "; ".join(antecedents) + ". Symptoms: " + "; ".join(symptom_texts) + "."
    label = pathology
    
    with open(output_path, 'a', encoding='utf-8') as f:
        data = {
            "most_likely_disease": label,
            "differential_diseases": differential_diseases
            
        }
        # Write each JSON object on a new line without pretty printing
        #json.dump(chat_format, f, ensure_ascii=False)
        f.write(json.dumps(data)+"\n")  # Add newline after each JSON object
        

In [34]:
test_results = test_patients.apply(create_text_representation,output_path ='test_result.csv', axis=1)

In [49]:
# #train_results = pd.DataFrame(train_patients.apply(create_text_representation, axis=1).toList(), columns=['text', 'label'])
# val_patient = val_patient
# val_result = val_patient.apply(create_text_representation, output_path ='val_result.jsonl', axis=1 )
# #val_results = pd.DataFrame(val_results.tolist(), columns=['text', 'label'])

# test_patient = test_patient
# test_result = test_patient.apply(create_text_representation,output_path ='test_result.jsonl', axis=1)
# #test_results = pd.DataFrame(test_results.tolist(), columns=['text', 'label'])

# # train_patient = train_patient
# train_result = train_patient.apply(create_text_representation,output_path ='train_result.jsonl', axis=1)

In [50]:
# val_result.to_csv('val_patients_with_text.csv', index=False)
# test_result.to_csv('test_patients_with_text.csv', index=False)
# train_result.to_csv('train_patients_with_text.csv', index=False)

In [51]:
# all_labels = pd.concat([train_patients['PATHOLOGY'], val_patients['PATHOLOGY'], test_patients['PATHOLOGY']])

# # Fit Label Encoder
# label_encoder = LabelEncoder()
# label_encoder.fit(all_labels)

# # Transform pathologies to numerical labels
# train_patients_label = label_encoder.transform(train_patients['PATHOLOGY'])
# val_patients_label = label_encoder.transform(val_patients['PATHOLOGY'])
# test_patients_label = label_encoder.transform(test_patients['PATHOLOGY'])

In [47]:
# # Load and display the first few entries
# def inspect_dataset(file_path, num_entries=5):
#     try:
#         with open(file_path, 'r', encoding='utf-8') as f:
#             print(f"Showing first {num_entries} entries of the dataset:\n")
#             for i, line in enumerate(f):
#                 if i >= num_entries:
#                     break
#                 # Parse the JSON object from each line
#                 chat_format = json.loads(line.strip())
#                 print(json.dumps(chat_format, indent=4, ensure_ascii=False))
#                 print("-" * 80)  # Separator for clarity
#     except Exception as e:
#         print(f"Error while reading the dataset: {e}")

# # Inspect the dataset
# inspect_dataset("test_result.jsonl")

In [4]:
import os
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2024-11-25 20:56:35.817266: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-25 20:56:35.829125: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732586195.843877 3263751 cuda_dnn.cc:8321] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732586195.848641 3263751 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-25 20:56:35.865475: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [5]:
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-3.5-mini-instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2024.11.7: Fast Llama patching. Transformers = 4.46.3.
   \\   /|    GPU: NVIDIA H100 80GB HBM3. Max memory: 79.097 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu124. CUDA = 9.0. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.dev941. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [6]:
from datasets import Dataset
# Load your dataset (assuming it is in JSONL format)
data = []
with open("train_result.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        data.append(json.loads(line))

# Convert to Hugging Face Dataset format
hf_dataset = Dataset.from_list(data)

In [7]:
from unsloth.chat_templates import get_chat_template
from datasets import Dataset

# Initialize the chat template with your tokenizer
tokenizer = get_chat_template(
    tokenizer,
    chat_template="phi-3",  # Specify your chat template
    mapping={
        "role": "from",
        "content": "value",
        "user": "human",
        "assistant": "gpt"
    }
)

# Define the formatting function
def formatting_prompts_func(examples):
    convos = examples["messages"]  # Replace with your column name
    texts = [
        tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
        for convo in convos
    ]
    return {"text": texts}

# Apply the formatting function to your dataset
formatted_dataset = hf_dataset.map(formatting_prompts_func, batched=True)

Map: 100%|██████████| 6000/6000 [00:00<00:00, 34155.48 examples/s]


In [8]:
# print(formatted_dataset[0]["text"])

In [9]:
dataset = load_dataset("json", data_files={"train": "sample_train_data_200k.json", "test": "sampled_test_combined_data.json"})

Generating train split: 200026 examples [00:02, 96340.54 examples/s]
Generating test split: 9062 examples [00:00, 105260.95 examples/s]


In [10]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-3.5-mini-instruct",
    max_seq_length = 1024,
    dtype = None,
    load_in_4bit = True,
)

==((====))==  Unsloth 2024.11.7: Fast Llama patching. Transformers = 4.46.3.
   \\   /|    GPU: NVIDIA H100 80GB HBM3. Max memory: 79.097 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu124. CUDA = 9.0. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.dev941. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [12]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

In [13]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "o_proj","gate_proj","k_proj", "v_proj","up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, 
    bias = "none",    
    use_gradient_checkpointing = True,
    random_state = 3411,
    max_seq_length = 1024,
    use_rslora = False,  # Rank stabilized LoRA
    loftq_config = None, # LoftQ
)

Unsloth 2024.11.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [14]:
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):

    instruction = 'Perform Diagnosis'
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for inputx, output in zip(inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, inputx, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

pass

dataset = dataset.map(formatting_prompts_func, batched = True,)

Map: 100%|██████████| 200026/200026 [00:01<00:00, 160184.13 examples/s]
Map: 100%|██████████| 9062/9062 [00:00<00:00, 125440.70 examples/s]


In [None]:
trainer = SFTTrainer(
    model = model,
    train_dataset = dataset['train'],
    #eval_dataset = dataset['validation'],
    dataset_text_field = "text",
    max_seq_length = 1024,
    tokenizer = tokenizer,
    args = TrainingArguments(
        per_device_train_batch_size = 16,
        gradient_accumulation_steps = 4,
        warmup_steps = 100,
        num_train_epochs = 1,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        output_dir = "gemma_outputs",
        lr_scheduler_type = "linear",
        optim = "adamw_8bit",
        seed = 3411,
    ),
)
trainer.train()

In [1]:
model.save_pretrained("phi_lora_model")
model.save_pretrained_merged("phi_merged_model", tokenizer, save_method = "merged_16bit",)

NameError: name 'model' is not defined

In [15]:
def generate_text(model,tokenizer, text, max_length):
    inputs = tokenizer(text, return_tensors="pt").to("cuda:0")
    outputs = model.generate(**inputs, max_new_tokens=300)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [16]:
def load_unsloth_model_and_tokenizer(model_path):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = model_path,
        max_seq_length = 1024,
        dtype = None,
        load_in_4bit = True,
        local_files_only = True,
    )
    return model, tokenizer

In [44]:
import json

def generate_predictions(test_file, model, tokenizer, output_file, max_new_tokens=200):
    """
    Generate predictions for test samples and save to a JSONL file.

    Args:
        test_file (str): Path to the test samples JSON file containing an array of objects.
        model: Loaded Unsloth model.
        tokenizer: Tokenizer for the model.
        output_file (str): Path to save the model predictions JSONL file.
        max_new_tokens (int): Maximum number of new tokens to generate.
    """
    predictions = []

    # Load the entire JSON array from the file
    with open(test_file, 'r') as f:
        data = json.load(f)  # Parse JSON as an array of objects

    for item in data:
        try:
            instruction = item.get("instruction", "Perform Diagnosis")
            human_input = item["input"]
            #print(f"Processing input: {human_input}")

            # Format input for the model using the Alpaca-style template
            prompt = alpaca_prompt.format(instruction, human_input, "")

            # Tokenize the input
            inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

            # Generate predictions
            outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, use_cache=True)
            prediction_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
            print(prediction_text)

            # Save the prediction
            predictions.append({
                "instruction": instruction,
                "input": human_input,
                "output": prediction_text.strip()
            })
        except KeyError as e:
            print(f"Skipping item due to missing key: {e}")
            continue

    # Write all predictions to the output file in JSONL format
    with open(output_file, 'w') as f:
        for prediction in predictions:
            f.write(json.dumps(prediction) + '\n')

    print(f"Predictions saved to {output_file}")


In [45]:
model_path = "phi_outputs/checkpoint-3125"
test_file = "sampled_test_combined_data.json"  # Test samples JSONL file
model_output_file = "model_predictions.json"  # Output predictions JSONL file
model, tokenizer = load_unsloth_model_and_tokenizer(model_path)

==((====))==  Unsloth 2024.11.7: Fast Llama patching. Transformers = 4.46.3.
   \\   /|    GPU: NVIDIA H100 80GB HBM3. Max memory: 79.097 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu124. CUDA = 9.0. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.dev941. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
FastLanguageModel.for_inference(model)
# Generate predictions
generate_predictions(test_file, model, tokenizer, model_output_file)
print(f"Predictions saved to {model_output_file}")

In [27]:
FastLanguageModel.for_inference(model)
# Generate predictions
inp = "Age: 70, Sex: F. History:Have you been in contact with a person with similar symptoms in the past 2 weeks?Yes; Have you traveled out of the country in the last 4 weeks?: No. Symptoms: Do you have a cough?Yes; Have you had significantly increased sweating?Yes; Do you have pain somewhere, related to your reason for consulting?Yes; Characterize your pain:: sensitive; Characterize your pain:: heavy; Do you feel pain somewhere?: top of the head; Do you feel pain somewhere?: forehead; Do you feel pain somewhere?: cheek(R); Do you feel pain somewhere?: cheek(L); Do you feel pain somewhere?: occiput; How intense is the pain?: 7; Does the pain radiate to another location?: nowhere; How precisely is the pain located?: 3; How fast did the pain appear?: 0; Do you have a fever (either felt or measured with a thermometer)?Yes; Do you have a sore throat?Yes; Do you have diffuse (widespread) muscle pain?Yes; Do you have nasal congestion or a clear runny nose?Yes; Do you have a cough?Yes."
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Perform Diagnosis in the format: Differential Diagnosis is: a,b,c... and Disease can be X",
        inp, # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True)
tokenizer.batch_decode(outputs)

['Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPerform Diagnosis in the format: Differential Diagnosis is: a,b,c... and Disease can be X\n\n### Input:\nAge: 70, Sex: F. History:Have you been in contact with a person with similar symptoms in the past 2 weeks?Yes; Have you traveled out of the country in the last 4 weeks?: No. Symptoms: Do you have a cough?Yes; Have you had significantly increased sweating?Yes; Do you have pain somewhere, related to your reason for consulting?Yes; Characterize your pain:: sensitive; Characterize your pain:: heavy; Do you feel pain somewhere?: top of the head; Do you feel pain somewhere?: forehead; Do you feel pain somewhere?: cheek(R); Do you feel pain somewhere?: cheek(L); Do you feel pain somewhere?: occiput; How intense is the pain?: 7; Does the pain radiate to another location?: nowhere; How precisely is the pain locate

In [7]:
import json
import re

def extract_disease(output_text):
    """
    Extracts the disease mentioned after 'Disease can be' in the output text.
    """
    match = re.search(r"Disease can be (.*)", output_text)
    return match.group(1).strip() if match else None

def calculate_accuracy_from_files(test_file, prediction_file):
    """
    Calculates accuracy by comparing the test outputs to prediction responses from two JSON files.

    Args:
        test_file (str): Path to the test data JSON file.
        prediction_file (str): Path to the prediction data JSON file.

    Returns:
        float: Accuracy percentage.
    """
    # Load test data
    with open(test_file, "r") as test_f:
        test_data = json.load(test_f)

    # Load prediction data
    with open(prediction_file, "r") as pred_f:
        prediction_data = json.load(pred_f)

    if len(test_data) != len(prediction_data):
        raise ValueError("Test data and prediction data must have the same number of entries.")

    correct_count = 0


    for test_entry, pred_entry in zip(test_data, prediction_data):
        # Extract diseases
        test_disease = extract_disease(test_entry["output"])
        pred_disease = extract_disease(pred_entry["output"])

        # Compare diseases
        if test_disease and pred_disease and test_disease == pred_disease:
            correct_count += 1

    # Calculate accuracy
    accuracy = (correct_count / len(test_data)) * 100
    return accuracy

# Example usage
test_file = "sampled_test_combined_data.json"  # Replace with the path to your test JSON file
prediction_file = "formatted_data.json"  # Replace with the path to your prediction JSON file

accuracy = calculate_accuracy_from_files(test_file, prediction_file)
print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 97.69%


In [8]:
from sklearn.metrics import precision_score, recall_score, f1_score
from rouge_score import rouge_scorer

def extract_differential_diagnoses(output_text):
    """
    Extracts the differential diagnoses from the output text.
    """
    match = re.search(r"Differential diagnosis is: (.*?) and the Disease can be", output_text)
    if match:
        diagnoses = match.group(1).strip()
        return set(diagnoses.split(", "))  # Split diagnoses into a set
    return set()

def calculate_metrics(test_file, prediction_file):
    """
    Calculates metrics like precision, recall, F1-score, and ROUGE for differential diagnoses.

    Args:
        test_file (str): Path to the test data JSON file.
        prediction_file (str): Path to the prediction data JSON file.

    Returns:
        dict: Dictionary containing accuracy, precision, recall, F1-score, and ROUGE metrics.
    """
    # Load test data
    with open(test_file, "r") as test_f:
        test_data = json.load(test_f)

    # Load prediction data
    with open(prediction_file, "r") as pred_f:
        prediction_data = json.load(pred_f)

    if len(test_data) != len(prediction_data):
        raise ValueError("Test data and prediction data must have the same number of entries.")

    precision_list, recall_list, f1_list, rouge_list = [], [], [], []
    correct_count = 0

    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

    for test_entry, pred_entry in zip(test_data, prediction_data):
        # Extract differential diagnoses
        test_differential = extract_differential_diagnoses(test_entry["output"])
        pred_differential = extract_differential_diagnoses(pred_entry["output"])

        # Calculate precision, recall, and F1
        tp = len(test_differential & pred_differential)  # True Positives
        fp = len(pred_differential - test_differential)  # False Positives
        fn = len(test_differential - pred_differential)  # False Negatives

        precision = tp / (tp + fp) if tp + fp > 0 else 0
        recall = tp / (tp + fn) if tp + fn > 0 else 0
        f1 = (2 * precision * recall) / (precision + recall) if precision + recall > 0 else 0

        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)

        # Calculate ROUGE-L
        test_diagnoses_str = ", ".join(test_differential)
        pred_diagnoses_str = ", ".join(pred_differential)
        rouge = scorer.score(test_diagnoses_str, pred_diagnoses_str)['rougeL'].fmeasure
        rouge_list.append(rouge)

    # Calculate averages
    avg_precision = sum(precision_list) / len(precision_list)
    avg_recall = sum(recall_list) / len(recall_list)
    avg_f1 = sum(f1_list) / len(f1_list)
    avg_rouge = sum(rouge_list) / len(rouge_list)

    return {
        "precision_differential": avg_precision,
        "recall_differential": avg_recall,
        "f1_differential": avg_f1,
        "rouge_differential": avg_rouge,
    }

# Example usage
test_file = "sampled_test_combined_data.json"  # Replace with the path to your test JSON file
prediction_file = "formatted_data.json"  # Replace with the path to your prediction JSON file

metrics = calculate_metrics(test_file, prediction_file)
print(metrics)

{'precision_differential': 0.9176261585108249, 'recall_differential': 0.9267435226579821, 'f1_differential': 0.9121278857091725, 'rouge_differential': 0.8773032723351539}
