In [8]:
import csv
import json

def process_enem_csv(file_path):
    result = {
        "name": "ENEM-samples",
        "exams": []
    }
    
    with open(file_path, 'r', encoding='latin-1') as file:
        reader = csv.DictReader(file, delimiter=';')
        
        for row in reader:
            user_id = int(row['NU_INSCRICAO'])
            
            exam_types = [
                ('CN', row['CO_PROVA_CN'], row['TX_RESPOSTAS_CN']),
                ('CH', row['CO_PROVA_CH'], row['TX_RESPOSTAS_CH']),
                ('LC', row['CO_PROVA_LC'], row['TX_RESPOSTAS_LC']),
                ('MT', row['CO_PROVA_MT'], row['TX_RESPOSTAS_MT'])
            ]
            
            for exam_type, exam_id, answers in exam_types:
                # if not answers:  
                #    continue
                    
                user_exam = []
                
                for i, answer in enumerate(answers):
                    #if answer and answer != ' ': 
                    question_entry = {
                        "question_id": i + 1,
                        "answer": answer,
                    }
                    user_exam.append(question_entry)
                       

                if user_exam:
                    user_info = {
                        "user_id": user_id,
                        "exam_id": exam_id if exam_id else None,
                        "user_exam": user_exam
                    }
                    result["exams"].append(user_info)
    
    return result

def save_json(data, output_file):
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)

def main():
    file_path = 'samples/ENEM-samples/mini_enem.csv'
    output_file = './outputs/enem_converted_data.json'
    
    try:
        data = process_enem_csv(file_path)
        save_json(data, output_file)
    except Exception as e:
        print(f"Error processing file: {e}")

if __name__ == "__main__":
    main()