In [31]:
# I had to do this adaptation to make rpy2 work:
# https://github.com/rpy2/rpy2/issues/1018
# C:\Users\pedro\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\rpy2

from rpy2 import situation
import os

print("Testing that R works from within Python using rpy2...")

try:
    import rpy2.robjects as robjects
except OSError as e:
    try:
        import os
        import platform
        
        r_home = situation.r_home_from_registry()
        r_bin = r_home + '\\bin\\x64\\'
        os.environ['R_HOME'] = r_home
        os.environ['PATH'] =  r_bin + ";" + os.environ['PATH']
        os.add_dll_directory(r_bin)

        print('R_HOME', r_home)
        import rpy2.robjects as robjects
    except OSError:
        raise(e)

print("R Worked from within Python using rpy2! You're good to go.")

Testing that R works from within Python using rpy2...
R Worked from within Python using rpy2! You're good to go.


In [32]:
import os

print("To compute IRT thetas, we need to have the original ENEM microdata data available.")
print("Download ENEM microdata from https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/microdados/enem.")
enem_microdata_path = "C:/Users/pedro/Downloads/TRI/microdados"

years = [2019, 2020, 2021, 2022]
for year in years:
    assert os.path.exists(f"{enem_microdata_path}/microdados_enem_{year}") , f"ENEM microdata for year {year} NOT found!"
    print(f"ENEM microdata for year {year} found!")


To compute IRT thetas, we need to have the original ENEM microdata data available.
Download ENEM microdata from https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/microdados/enem.
ENEM microdata for year 2019 found!
ENEM microdata for year 2020 found!
ENEM microdata for year 2021 found!
ENEM microdata for year 2022 found!


In [33]:
import pandas as pd

# This is the function you want to call.
# 'response_pattern_filepath' must have a column RESPONSE_PATTERN containing 0's and 1's.
# And also a column CO_PROVA containing the ENEM exam code.
# 'itens_prova_filepath' must point to ENEM's official file that contains IRT params, e.g, ITENS_PROVA_2022.csv

def run_R_IRT_script(response_pattern_filepath, itens_prova_filepath):
    # Read the content of the R script file
    r_script_file = "fit_irt.R"
    with open(r_script_file, 'r') as file:
        r_script = file.read()
    
    robjects.r.assign('response_pattern_filepath', response_pattern_filepath)
    robjects.r.assign('file_itens_prova', itens_prova_filepath)
    robjects.r(r_script)



In [34]:

# This function reads a file such as exp_MT_2022_mistral_simple-zero-shot_bits_4_count_24.zip
# and gets theta IRT params.
def run_R_IRT_from(filepath):

    if "2022" in filepath:
        YEAR = "2022"
    elif "2021" in filepath:
        YEAR = "2021"
    elif "2020" in filepath:
        YEAR = "2020"
    elif "2019" in filepath:
        YEAR = "2019"
    else:
        raise Exception(f"YEAR not found in {filepath}")

    file_itens_prova = f"{enem_microdata_path}/microdados_enem_{YEAR}/DADOS/ITENS_PROVA_{YEAR}.csv"

    run_R_IRT_script( f"{filepath}/aggregated/majority_sample.csv", file_itens_prova)
    run_R_IRT_script( f"{filepath}/aggregated/samples.csv", file_itens_prova)
    run_R_IRT_script( f"{filepath}/aggregated/random_samples.csv", file_itens_prova)

    thetas_df = pd.read_csv(f"{filepath}/aggregated/samples_with_theta.csv")
    thetas_df.sort_values(by=['IRT_SCORE', 'CTT_SCORE'], ascending=False).to_csv(f"{filepath}/aggregated/samples_with_theta.csv")
    
    thetas_random_df = pd.read_csv(f"{filepath}/aggregated/random_samples_with_theta.csv")
    thetas_random_df.sort_values(by=['IRT_SCORE', 'CTT_SCORE'], ascending=False).to_csv(f"{filepath}/aggregated/random_samples_with_theta.csv")
    print("thetas.csv finished! rows:", thetas_df.shape[0])

    #average_df = thetas_df.groupby('CTT_SCORE')['IRT_SCORE'].agg({'Value': ['mean', 'median', 'min', 'max']})

    average_df = thetas_df.groupby('CTT_SCORE')['IRT_SCORE'].mean().reset_index()
    average_random_df = thetas_random_df.groupby('CTT_SCORE')['IRT_SCORE'].mean().reset_index()
    
    return thetas_df, average_df, thetas_random_df, average_random_df

In [35]:
import os
import zipfile
import tempfile
import random
import string
from pathlib import Path

def compute_everything(source_directory, target_directory):
    # Get a list of all files in the specified directory
    file_list = os.listdir(source_directory)
    #print(file_list)

    for file_name in file_list:
        # Check if the file is a zip file
        if file_name.startswith("exp_") and file_name.endswith(".zip"):
            #print('file_name', file_name)
            zip_path = os.path.join(source_directory, file_name)

            # exp_LC_pt-br_2022_mistral_simple-zero-shot_bits_4_count_14.zip
            (exp, exam, language, year, llm, prompt, b, bits, c, shuffle_count) = file_name.split("_")
            print(file_name, ':', 'exam', exam, 'language', language, 'year', year, 'prompt', prompt, 'bits', bits, 'shuffle count', shuffle_count)

            Path(f"{target_directory}/{exam}").mkdir(parents=True, exist_ok=True)
            Path(f"{target_directory}/{exam}/{year}").mkdir(parents=True, exist_ok=True)
            Path(f"{target_directory}/{exam}/{year}/{llm}").mkdir(parents=True, exist_ok=True)
            Path(f"{target_directory}/{exam}/{year}/{llm}/{prompt}").mkdir(parents=True, exist_ok=True)

            full_target_directory = f"{target_directory}/{exam}/{year}/{llm}/{prompt}" 
            #print('Will write to', full_target_directory)

            if os.path.exists(f"{full_target_directory}/aggregated/samples_with_theta.csv"):
                print(full_target_directory, ' already exists, skipping.\n')
                continue
                
            # Create a ZipFile object
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                # Extract the contents to the temporary directory
                zip_ref.extractall(full_target_directory)
                print(f"Files extracted to: {full_target_directory}")
                
                thetas_df, average_df, thetas_random_df, average_random_df = run_R_IRT_from(full_target_directory)
                average_df.to_csv(full_target_directory + "/aggregated/average_theta_by_score_sample.csv")
                average_random_df.to_csv(full_target_directory + "/aggregated/average_theta_by_score_random_sample.csv")


In [36]:
source_directory_path = "C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/ALL"
target_directory_path = "C:/Users/pedro/Downloads/TRI/test_responses_llms/EXP"

print(f"Reading LLM responses from {source_directory_path} and generating IRT scores into {target_directory_path}\n")

compute_everything(source_directory_path, target_directory_path)

print("FINISHED!")

Reading LLM responses from C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/ALL and generating IRT scores into C:/Users/pedro/Downloads/TRI/test_responses_llms/EXP

exp_CH_pt-br_2020_llama2_simple-zero-shot_bits_4_count_1.zip : exam CH language pt-br year 2020 prompt simple-zero-shot bits 4 shuffle count 1.zip
C:/Users/pedro/Downloads/TRI/test_responses_llms/EXP/CH/2020/llama2/simple-zero-shot  already exists, skipping.

exp_CH_pt-br_2020_mistral_paper-nunes-2023-zero-shot_bits_4_count_10.zip : exam CH language pt-br year 2020 prompt paper-nunes-2023-zero-shot bits 4 shuffle count 10.zip
C:/Users/pedro/Downloads/TRI/test_responses_llms/EXP/CH/2020/mistral/paper-nunes-2023-zero-shot  already exists, skipping.

exp_CH_pt-br_2020_mistral_simple-zero-shot_bits_4_count_10.zip : exam CH language pt-br year 2020 prompt simple-zero-shot bits 4 shuffle count 10.zip
C:/Users/pedro/Downloads/TRI/test_responses_llms/EXP/CH/2020/mistral/simple-zero-shot  already exists, skipping.

exp_CH_pt-br

R[write to console]: Error in dimnames(x) <- dn : 
  length of 'dimnames' [2] not equal to array extent



RRuntimeError: Error in dimnames(x) <- dn : 
  length of 'dimnames' [2] not equal to array extent


In [None]:
#df = pd.read_parquet("C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/ALL/GABRIEL/enem-experiments-results-processed.parquet")

#df.sort_values(by='CTT_SCORE', ascending=False)[['ENEM_EXAM','MODEL_NAME','MODEL_SIZE','LANGUAGE','CTT_SCORE']]

#df.to_csv("C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/ALL/enem-experiments-results-processed.csv")

#df.head()

In [None]:
#df = pd.read_csv("C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/ALL/GABRIEL/enem-experiments-results-processed.csv")
#theta_df = pd.read_csv("C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/ALL/GABRIEL/thetas-enem-experiments-results-processed.csv")

#result_df = pd.concat([df, theta_df], axis=1, verify_integrity=False)
#result_df = result_df.T.drop_duplicates().T

#result_df.to_csv("C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/ALL/GABRIEL/experiments-with-irt.csv")

In [None]:
# 1062 CH
# 1082 MT
# 1092 CN


#result_df[result_df['CO_PROVA'] == 1092][['ENEM_EXAM', 'MODEL_NAME', 'LANGUAGE', 'MODEL_SIZE','CTT_SCORE','IRT_SCORE', 'MEAN_CORRECT_B', 'MEAN_INCORRECT_B ']].sort_values(by='CTT_SCORE', ascending=False)

