In [2]:
import os
import csv
import json
import re

def process_csv_files(folder_path):
    result = {
        "name": "EdNet-KT2-samples",
        "exams": []
    }
    
    if not os.path.exists(folder_path):
        print(f"Folder {folder_path} not found")
        return None
    
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    
    for filename in csv_files:
        user_id_match = re.search(r'u(\d+)\.csv', filename)
        if not user_id_match:
            continue
            
        user_id = int(user_id_match.group(1))
        file_path = os.path.join(folder_path, filename)
        
        user_exam = []
        
        with open(file_path, 'r') as file:
            reader = csv.DictReader(file)
            
            for row in reader:
                if row['action_type'] == 'respond' and row['item_id'].startswith('q'):
                    question_id = int(row['item_id'][1:])
                    
                    exam_entry = {
                        "question_id": question_id,
                        "answer": row['user_answer'] if row['user_answer'] else None,
                        "timestamp": row['timestamp'] if row['timestamp'] else None
                    }
                    
                    user_exam.append(exam_entry)
        
        if user_exam:
            user_info = {
                "user_id": user_id,
                "user_exam": user_exam
            }
            result["exams"].append(user_info)
    
    return result

#In the future i gonna use the json to send data to backend, as json mainly. 
def save_json(data, output_file):
    with open(output_file, 'w') as f:
        json.dump(data, f, indent=2)

def main():
    folder_path = './samples/EdNet-KT2-samples'
    output_file = './outputs/kt_json_data.json'
    
    data = process_csv_files(folder_path)
    
    if data:
        save_json(data, output_file)
    else:
        print("No data processed")

if __name__ == "__main__":
    main()