# Process

This notebook processes the data from the level 3 assessment. It basically split a single csv file into multiple files, one for each level 3.

In [6]:
import os

DATA_INPTUT_ROOT = "../../data/csv_oct15_24"
DATA_OUTPUT_ROOT = DATA_INPTUT_ROOT + "_level3"

In [11]:
import re
import pandas as pd


def extract_level_2_name(path: str) -> str:
    """
    Extract the level 2 name from the path
    """
    # The path is in the format of responses_assessment_level2_1.1.csv
    # We want to extract the chapter number from the file name, i.e. 1.1 in this case
    pattern = re.compile(r".*responses_assessment_level2_(\d+\.\d+)\.csv")

    # Find column from data.columns that match the above pattern and extract out the chapter
    match = pattern.match(path)
    level_2_name = match.group(1)
    return level_2_name


def process_data(path: str) -> None:
    """
    Process the data from the file specified by the path.
    """

    if not os.path.exists(DATA_OUTPUT_ROOT):
        os.makedirs(DATA_OUTPUT_ROOT)

    data = pd.read_csv(path, index_col=0, low_memory=False)

    for name, group in data.groupby("level3"):
        group.to_csv(f"{DATA_OUTPUT_ROOT}/level3_{name}.csv")

In [12]:
process_data(f"{DATA_INPTUT_ROOT}/responses_assessment_level2_2.1.csv")

In [13]:
process_data(f"{DATA_INPTUT_ROOT}/responses_assessment_level2_3.1.csv")

In [14]:
process_data(f"{DATA_INPTUT_ROOT}/responses_assessment_level2_4.1.csv")