In [13]:
import json
import numpy as np
from scipy.stats import pearsonr

def calculate_metrics(file_path):
    """
    Calculates RMSE and Pearson correlation between 'grade' and 'bert_grade'.

    Args:
        file_path (str): The path to the JSON file containing the data.

    Returns:
        tuple: A tuple containing:
            - float: The RMSE value.
            - float: The Pearson correlation coefficient.
            - float: The p-value for the Pearson correlation.
    """
    grades = []
    bert_grades = []

    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    # The data is a list of dictionaries, where each dictionary represents a question.
    # Inside each question dictionary, there's a key 'responses_students' which is a list of student responses.
    # Each student response dictionary contains 'grade' and 'bert_grade'.
    for question_data in data:
        if 'responses_students' in question_data: # Corrected key name
            for response in question_data['responses_students']:
                if 'grade' in response and 'bert_grade' in response:
                    grades.append(response['grade'])
                    bert_grades.append(response['bert_grade'])

    if not grades or not bert_grades:
        raise ValueError("No 'grade' or 'bert_grade' data found in the file with the expected structure.")

    # Calculate RMSE
    rmse = np.sqrt(np.mean((np.array(grades) - np.array(bert_grades))**2))

    # Calculate Pearson Correlation
    # pearsonr returns (correlation_coefficient, p-value)
    correlation_coefficient, p_value = pearsonr(grades, bert_grades)

    return rmse, correlation_coefficient, p_value

if __name__ == "__main__":
    json_file_path = 'correcao_bertEN.json'

    try:
        rmse_value, pearson_corr, p_value = calculate_metrics(json_file_path)
        print(f"RMSE: {rmse_value:.4f}")
        print(f"Pearson Correlation Coefficient: {pearson_corr:.4f}")
        print(f"P-value for Pearson Correlation: {p_value:.4f}")
    except FileNotFoundError:
        print(f"Error: The file '{json_file_path}' was not found.")
    except ValueError as e:
        print(f"Error: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

RMSE: 1.7516
Pearson Correlation Coefficient: 0.4594
P-value for Pearson Correlation: 0.0000
