In [None]:
from typing import List

from parser.model.page_model import ExtractionType, Metadata, SubQuestion, Text

In [None]:
# Regular expression as defined above
import regex

pattern = regex.compile(
    r"""
    (?P<return_type>
        (?:[\w\s\*\&\[\]]+?\s+)+
    )
    (?P<function_name>\w+)\s*
    \(
        (?P<arguments>
            (?:[^()]*|\((?:[^()]|\([^()]*\))*\))*
        )
    \)\s*
    (?P<body>
        \{
            (?>
                [^{}]+
                |
                (?&body)
            )*
        \}
    )
    """,
    regex.VERBOSE | regex.MULTILINE | regex.DOTALL,
)

# Sample code containing multiple functions
code = """
int add(int a, int b) {
    return a + b;
}

void swap(int *a, int *b) {
    int temp = *a;
    *a = *b;
    *b = temp;
}

static inline int max(int a, int b) {
    return a > b ? a : b;
}

struct node* create_node(int data) {
    struct node* new_node = (struct node*)malloc(sizeof(struct node));
    new_node->data = data;
    new_node->next = NULL;
    return new_node;
}

void (*signal(int sig, void (*func)(int)))(int) {
    // function body
}

void complex_function(
    int a,
    char *b,
    double (*c)(int)
) {
    // code
}

void process_array(int arr[], size_t size) {
    for (size_t i = 0; i < size; ++i) {
        // process arr[i]
    }
}

void no_arguments(void) {
    // code
}

const char* const* get_strings(void) {
    static const char* strs[] = {"hello", "world", NULL};
    return strs;
}

int compute(int **matrix, int rows, int cols) {
    int sum = 0;
    for(int i = 0; i < rows; ++i) {
        for(int j = 0; j < cols; ++j) {
            sum += matrix[i][j];
        }
    }
    return sum;
}

Consider a minheap stored in an integer array int heaparray[100], which is globally declared.
Complete the percolateUp function below that takes an index and perform the full percolate up operation
for the item at that index. While writing the code, you can assume that there is a swap function available
for you that is described below.

   // swap(int* ptrA, int* ptrB) – swaps the contents in the variables
   //                              pointed to by ptrA and ptrB.
   void percolateUp(int idx){


       if ( ________ > 1) {

           if ( ______________________________________________ ) {

               swap( ________________________ , _______________________ )

               percolateUp( ______________ );
           }
       }
   }
"""

# Find all matches
matches = pattern.finditer(code)


for match in matches:
    function_name = match.group("function_name")
    arguments = match.group("arguments")
    body = match.group("body")
    print(f"Function name: {function_name}")
    print(f"Arguments: {arguments.strip()}")
    print(f"Body:\n{body.strip()}")
    print("-" * 50)

In [None]:
FUNCTION_EXTRACTION_PATTERN = regex.compile(
    r"""
    (?P<return_type>
        (?:
            [\w\*\&\[\]]+          # Match return type characters
            (?:\s+|/[*].*?[*]/)+    # Match whitespace or comments
        )+
    )
    (?P<function_name>\w+)\s*
    \(
        (?P<arguments>
            (?:[^()]*|\((?:[^()]|\([^()]*\))*\))*
        )
    \)\s*
    (?P<body>
        \{
            (?:
                [^{}]+
                |
                (?&body)
            )*
        \}
    )
    """,
    regex.MULTILINE | regex.DOTALL | regex.VERBOSE,
)


def extract_code_free_response_sub_questions(text: str) -> List[SubQuestion]:
    print(f"Called extract_code_free_response_sub_questions with text={text}")
    """
    Extracts code-free-response sub-questions from a given text.

    This function scans through each line of the provided text to identify
    sub-questions that are formatted as code-free-response. A line is considered
    a code-free-response sub-question if it contains more than 10 repeated new line characters
    beginning with an opening bracket and ending with a closing bracket and

    Args:
        text (str): The text from which to extract fill-in-the-blank sub-questions.

    Returns:
        List[SubQuestion]: A list of SubQuestion objects representing each
        identified fill-in-the-blank sub-question. Each SubQuestion includes
        the original text, filtered text, and metadata indicating it was
        extracted using underscores.
    """

    # Find all matches
    matches = FUNCTION_EXTRACTION_PATTERN.finditer(text)

    sub_questions: List[SubQuestion] = []

    for match in matches:
        raw = match.group(0)
        function_name = match.group("function_name")
        arguments = match.group("arguments")
        body = match.group("body")

        print(f"Function name: {function_name}")
        print(f"Arguments: {arguments.strip()}")
        print(f"Body:\n{body.strip()}")
        print("-" * 50)

        # if largest_subsequence_of_empty_strings(body.split("\n")) < 5:
        #    print(f"Skipping code-free-response sub-question: {raw}")
        #    continue

        question_text: Text = Text.from_string(raw, text, 0)

        sub_question = SubQuestion(
            identifier=function_name,
            points=None,
            original_text=question_text,
            filtered_text=question_text,
            sub_questions=[],
            metadata=Metadata(extraction_type=ExtractionType.CODE_FREE_RESPONSE),
        )
        sub_questions.append(sub_question)

    print(f"Returning sub_questions={sub_questions}")

    return sub_questions


extract_code_free_response_sub_questions(code)

In [None]:
from parser.dataset.dataloader import DataLoader
from parser.model.page_model import Semester

data_loader = DataLoader("../fe_files/exams/", None)
data_loader.load_data()

exam = data_loader.get_exam(semester=Semester.FALL, year=2017)

section = exam.sections[len(exam.sections) - 1]

text = section.questions[len(section.questions) - 1].original_text

matches = pattern.finditer(text)

for match in matches:
    function_name = match.group("function_name")
    arguments = match.group("arguments")
    body = match.group("body")
    print(f"Function name: {function_name}")
    print(f"Arguments: {arguments.strip()}")
    print(f"Body:\n{body.strip()}")
    print("-" * 50)

In [None]:
def extract_fill_in_the_blank_sub_questions(text: str) -> List[SubQuestion]:
    """
    Extracts fill-in-the-blank sub-questions from a given text, handling each
    independently, including multi-line underscores, based on the specified formats.
    """
    text_lines = text.splitlines(
        keepends=True
    )  # Keep line endings for accurate indexing
    sub_questions: List[SubQuestion] = []

    # Initialize variables
    is_collecting = False
    start_index = None
    found_underscore = (
        False  # Ensure underscores are found before creating a sub-question
    )

    # Compute cumulative lengths of lines for accurate indexing
    line_lengths = [len(line) for line in text_lines]
    cumulative_lengths = [0]
    for length in line_lengths:
        cumulative_lengths.append(cumulative_lengths[-1] + length)

    for i, line in enumerate(text_lines):
        stripped_line = line.strip()
        line_without_whitespace = line.replace(" ", "")
        contains_underscore = "_____" in line_without_whitespace

        line_contains_colon = any(
            char in line_without_whitespace for char in [":", "=", ";"]
        )

        # Check if the line starts a sub-question
        starts_fill_in = line_contains_colon

        if starts_fill_in:
            # If we're already collecting, finalize the previous sub-question
            if is_collecting and found_underscore:
                end_index = cumulative_lengths[i]
                question_text = text[start_index:end_index]
                question_text_obj = Text.from_string(question_text, text, start_index)
                sub_question = SubQuestion(
                    identifier="",
                    points=None,
                    original_text=question_text_obj,
                    filtered_text=question_text_obj,
                    sub_questions=[],
                    metadata=Metadata(
                        extraction_type=ExtractionType.FILL_IN_THE_BLANKS
                    ),
                )
                sub_questions.append(sub_question)
            # Start collecting the new sub-question
            is_collecting = True
            start_index = cumulative_lengths[i]
            found_underscore = contains_underscore
        elif is_collecting:
            # Continue collecting lines
            if stripped_line == "" or contains_underscore:
                if contains_underscore:
                    found_underscore = True
                # Keep collecting
            else:
                # End of the current sub-question
                if found_underscore:
                    end_index = cumulative_lengths[i]
                    question_text = text[start_index:end_index]
                    question_text_obj = Text.from_string(
                        question_text, text, start_index
                    )
                    sub_question = SubQuestion(
                        identifier="",
                        points=None,
                        original_text=question_text_obj,
                        filtered_text=question_text_obj,
                        sub_questions=[],
                        metadata=Metadata(
                            extraction_type=ExtractionType.FILL_IN_THE_BLANKS
                        ),
                    )
                    sub_questions.append(sub_question)
                # Reset flags
                is_collecting = False
                start_index = None
                found_underscore = False

    # Handle any remaining collected lines at the end of the text
    if is_collecting and found_underscore:
        end_index = cumulative_lengths[-1]
        question_text = text[start_index:end_index]
        question_text_obj = Text.from_string(question_text, text, start_index)
        sub_question = SubQuestion(
            identifier="",
            points=None,
            original_text=question_text_obj,
            filtered_text=question_text_obj,
            sub_questions=[],
            metadata=Metadata(extraction_type=ExtractionType.FILL_IN_THE_BLANKS),
        )
        sub_questions.append(sub_question)

    return sub_questions

In [None]:
text = """
Partition Element Index: ______

Partition Element Value: ______

Reason it was the Partition Element:
__________________________________________________________________________________

__________________________________________________________________________________
"""

sub_questions = extract_fill_in_the_blank_sub_questions(text)
for sq in sub_questions:
    print("Extracted SubQuestion:")
    print(sq.original_text.text)
    print("-" * 50)

In [None]:
text = """
Explain your reasoning:




_____
"""

sub_questions = extract_fill_in_the_blank_sub_questions(text)
for sq in sub_questions:
    print("Extracted SubQuestion:")
    print(sq.original_text.text)
    print("-" * 50)

In [None]:
text = """
- Partition Element Index: ______
- Partition Element Value: ______
- Reason it was the Partition Element:
________________________________________________________________________________

________________________________________________________________________________
"""

sub_questions = extract_fill_in_the_blank_sub_questions(text)
for sq in sub_questions:
    print("Extracted SubQuestion:")
    print(sq.original_text.text)
    print("-" * 50)

In [None]:
import re


def extract_linkedlist_subquestion(text: str) -> List[SubQuestion]:
    """
    Extracts linked list sub-questions from the given text.

    This function identifies linked list sub-questions within the provided text
    by using a predefined regex pattern. It processes each match to extract relevant
    details such as the sub-question identifier, points, and text. Each set of
    underscores is treated as a separate question within a subquestion.

    Args:
        text (str): The text from which to extract the linked list sub-questions.

    Returns:
        List[SubQuestion]: A list of SubQuestion objects representing the parsed
        linked list sub-questions with their associated metadata.
    """
    # Define the regex pattern to match the linked list sub-question
    pattern = re.compile(r"head\s*→\s*(____(?:\s*→\s*____)*)")

    sub_questions = []

    # Use finditer to find all matches in the text
    for match in pattern.finditer(text):
        # Extract the underscores from the matched pattern
        underscores = match.group(1).split("→")
        underscores = [underscore.strip() for underscore in underscores]

        # Create a SubQuestion for each underscore
        for idx, underscore in enumerate(underscores, start=1):
            # sub_question_text = f"head → {' → '.join(underscores[:idx])} → {underscore}"

            sub_question = SubQuestion(
                identifier=f"linkedlist_{idx}",
                points=None,
                filtered_text=Text.from_string(underscore, text, match.start()),
                original_text=Text.from_string(underscore, text, match.start()),
                sub_questions=[],
                metadata=Metadata(
                    extraction_type=ExtractionType.LINKED_LIST_MODIFICATION
                ),
            )

            sub_questions.append(sub_question)

    return sub_questions


question = """
Suppose we have a singly linked list implemented with the structure below and a function that takes in\nthe head of the list.\n\ntypedef struct node_s {\n    int data;\n    struct node_s * nextptr;\n} node_t;\n\nvoid whatDoYouDo(node_t * head){\n     node_t * temp = head;\n     node_t * temp2 = head->nextptr;\n     int a;\n\n     while(temp->nextptr != NULL){\n          a = temp->data;\n          temp->data = temp2->data;\n          temp2->data = a;\n\n          temp = temp->nextptr;\n\n          if(temp->nextptr != NULL){\n               temp = temp->nextptr;\n               temp2 = temp->nextptr;\n          }\n     }\n}\nIf we call whatDoYouDo (head) on the following list, show the list after the function has finished.\n\nhead -> 5 -> 2 -> 1 -> 8 -> 7? Please fill in the designated slots below.\n\n\n\n\n\n                      head → ____ → ____ → ____ → ____ → ____
"""

sub_questions = extract_linkedlist_subquestion(question)
for sq in sub_questions:
    print("Extracted SubQuestion:")
    print(sq.filtered_text.text)
    print("-" * 50)