In [16]:
import os
import json
from pypdf import PdfReader, PdfWriter
import re
import random

In [17]:
math_sections = ['algebra', 'advanced', 'geometry', 'problems']
english_sections = ['conventions', 'craft', 'expressions', 'ideas']

# modes 
# 0 -> merge question sets by difficulty
# 1 -> grade 

# modify
student_name = 'gianmarco'
mode = 0
section = 'algebra'
set_number = 1.1
wrong = False
new = True

In [18]:
template_dir = f"./template_files/{'english' if section in english_sections else 'math'}/{section}"
student_dir = f"./student_files/{student_name}/{'english' if section in english_sections else 'math'}/{section}"
    

In [19]:
verbose = True
if mode == 1:
    if not wrong: 
        pdf_path = os.path.join(student_dir, f"{section}_{set_number}_{student_name}.pdf")
    else: 
        pdf_path = os.path.join(student_dir, f"wrong/{section}_{set_number}_{student_name}_wrong_questions.pdf")
    answers_json_path = os.path.join(template_dir, 'answers/answers.json')
    user_answers_path = './student_answers.txt'
    
    reader = PdfReader(pdf_path)
    number_of_pages = len(reader.pages)
    
    # get all ids and corresponding pages in question set
    ids = []
    page_map = []  # Store page numbers corresponding to each ID
    for page_number, page in enumerate(reader.pages):
        text = page.extract_text()
        # find the ID line
        if text:
            lines = text.split('\n')
            if lines: 
                if (lines[0].startswith("ID: ")):
                    id_value = lines[0].split("ID: ")[1].strip()
                elif (lines[0].startswith("Question ID ")):
                    id_value = lines[0].split("ID ")[1].strip()
                else: 
                    id_value = -1 
                id_value = re.sub(r'\s+', '', id_value)
                ids.append(id_value)
                page_map.append(page_number)  # Store the page number
                

    # load answers from the JSON file
    with open(answers_json_path, 'r') as json_file:
        answers = json.load(json_file)

    # read user answers from the text file
    with open(user_answers_path, 'r', encoding='utf-8') as file:
        user_answers_list = [line.encode('ascii', 'ignore').decode('ascii').strip() for line in file]
        
    # initialize counters and result storage
    results = {
        "Correct": [],
        "Incorrect": [],
        "Answer Not found": [],
        "ID Not found": []
    }
    correct_count = 0
    incorrect_count = 0
    not_found_count = 0

    # Initialize a PdfWriter object for incorrect answers
    wrong_questions_pdf = PdfWriter()

    # function to process each user input
    def process_input(user_input, question_number):
        global correct_count, incorrect_count, not_found_count 
        id_value = ids[question_number - 1]  # Get the ID for the current question
        
        # If verbose is enabled, print the ID and student answer
        if verbose:
            print(f"{id_value}, {user_input}")
        
        result = {}
        if id_value in answers:
            correct_answer = answers[id_value]
            # Normalize and split the correct answer
            correct_answers = [ans.strip() for ans in correct_answer.split(',')]
            
            if correct_answer == 'Not found':
                result = {"Question": question_number, "ID": id_value, "Status": "Answer Not found"}
                results["Answer Not found"].append(result)
                not_found_count += 1
            elif str(user_input).strip() in correct_answers:
                result = {"Question": question_number, "ID": id_value, "Status": "Correct"}
                results["Correct"].append(result)
                correct_count += 1
            else:
                result = {"Question": question_number, "ID": id_value, "Status": f"Incorrect ({correct_answer})"}
                results["Incorrect"].append(result)
                incorrect_count += 1

                # Append the corresponding page to the wrong questions PDF
                wrong_questions_pdf.add_page(reader.pages[page_map[question_number - 1]])
                
        else:
            result = {"Question": question_number, "ID": id_value, "Status": "ID Not found"}
            results["ID Not found"].append(result)
            not_found_count += 1
        # process user answers from the text file
    
    for question_number, user_answer in enumerate(user_answers_list, start=1):
        if question_number > len(ids):
            print("More answers provided than questions.")
            break
        process_input(user_answer, question_number)

    # print results by status
    for status, entries in results.items():
        print(f"\n{status}:")
        for entry in entries:
            print(f"Question #{entry['Question']} (ID: {entry['ID']}): {entry['Status']}")

    # print totals
    print(f"\nTotal Correct: {correct_count}")
    print(f"Total Incorrect: {incorrect_count}")
    print(f"Total Not Found: {not_found_count}")

    # Save the PDF with incorrect questions
    if incorrect_count > 0:
        if not wrong:
            wrong_questions_pdf_path = os.path.join(student_dir, f"wrong/{section}_{set_number}_{student_name}_wrong_questions.pdf")
        else:
            wrong_questions_pdf_path = os.path.join(student_dir, f"definitely_wrong/{section}_{set_number}_{student_name}_definitely_wrong_questions.pdf")
        wrong_dir = os.path.dirname(wrong_questions_pdf_path)
        if not os.path.exists(wrong_dir):
            os.makedirs(wrong_dir)
            print(f"Created directory: {wrong_dir}")
        with open(wrong_questions_pdf_path, 'wb') as output_pdf:
            wrong_questions_pdf.write(output_pdf)
        print(f"Wrong questions PDF generated: {wrong_questions_pdf_path}")
    else:
        print("No incorrect answers, no PDF generated for wrong questions.")

In [None]:
if mode == 0:

    def get_pdf_files(directory, difficulty):
        return [f for f in os.listdir(directory) if f.endswith('.pdf') and difficulty in f]

    def merge_pdfs(directory, pdf_files, output_path):
        pdf_writer = PdfWriter()
        for pdf_file in pdf_files:
            pdf_reader = PdfReader(os.path.join(directory, pdf_file))
            for page in pdf_reader.pages:
                pdf_writer.add_page(page)
        with open(output_path, 'wb') as out_pdf:
            pdf_writer.write(out_pdf)

    def extract_pages_from_pdf(pdf_path):
        pdf_reader = PdfReader(pdf_path)
        return list(range(len(pdf_reader.pages)))

    def select_pages(pages_list, num_pages, selected_pages_set):
        available_pages = list(set(pages_list) - selected_pages_set)
        if num_pages > len(available_pages):
            raise ValueError("Not enough pages to select from.")
        selected = random.sample(available_pages, num_pages)
        selected_pages_set.update(selected)
        return selected

    def save_selected_pages(filename, selected_pages):
        """Update the history file without overwriting existing selections."""
        if os.path.exists(filename):
            # Load existing history and merge with new selections
            with open(filename, 'r') as file:
                existing_data = json.load(file)
        else:
            existing_data = {"easy": [], "medium": [], "hard": []}

        # Merge the new pages with the existing ones, avoiding duplicates
        for difficulty in ["easy", "medium", "hard"]:
            existing_data[difficulty] = list(
                set(existing_data.get(difficulty, [])).union(set(selected_pages[difficulty]))
            )

        # Save the updated history
        with open(filename, 'w') as file:
            json.dump(existing_data, file, indent=4)

    def load_selected_pages(filename):
        if os.path.exists(filename) and new is True:
            with open(filename, 'r') as file:
                return json.load(file)
        return {"easy": [], "medium": [], "hard": []}

    def main(directory, output_path, history_file):
        # Check if the student's directory exists, create it if not
        student_dir = os.path.dirname(output_path)
        if not os.path.exists(student_dir):
            os.makedirs(student_dir)
            print(f"Created directory: {student_dir}")

        # Paths for merged PDFs
        merged_easy_path = os.path.join(directory, "merged_easy.pdf")
        merged_medium_path = os.path.join(directory, "merged_medium.pdf")
        merged_hard_path = os.path.join(directory, "merged_hard.pdf")

        # Merge PDFs only if the merged files don't already exist
        if not os.path.exists(merged_easy_path):
            easy_files = get_pdf_files(directory, "(Easy)")
            merge_pdfs(directory, easy_files, merged_easy_path)
            print(f"Merged Easy PDFs into {merged_easy_path}")
        if not os.path.exists(merged_medium_path):
            medium_files = get_pdf_files(directory, "(Medium)")
            merge_pdfs(directory, medium_files, merged_medium_path)
            print(f"Merged Medium PDFs into {merged_medium_path}")
        if not os.path.exists(merged_hard_path):
            hard_files = get_pdf_files(directory, "(Hard)")
            merge_pdfs(directory, hard_files, merged_hard_path)
            print(f"Merged Hard PDFs into {merged_hard_path}")

        # Track selected pages
        selected_pages = load_selected_pages(history_file)
        selected_pages_set = {
            "easy": set(selected_pages["easy"]),
            "medium": set(selected_pages["medium"]),
            "hard": set(selected_pages["hard"])
        }

        easy_pages = extract_pages_from_pdf(merged_easy_path)
        medium_pages = extract_pages_from_pdf(merged_medium_path)
        hard_pages = extract_pages_from_pdf(merged_hard_path)

        # Print the available and selected pages count for each difficulty
        print(f"Total Available = {len(easy_pages) + len(medium_pages) + len(hard_pages)-len(selected_pages_set['easy'])-len(selected_pages_set['medium'])-len(selected_pages_set['hard'])}")
        print(f"Easy Pages: Available = {len(easy_pages)}, Selected = {len(selected_pages_set['easy'])}")
        print(f"Medium Pages: Available = {len(medium_pages)}, Selected = {len(selected_pages_set['medium'])}")
        print(f"Hard Pages: Available = {len(hard_pages)}, Selected = {len(selected_pages_set['hard'])}")

        # Ask for user input on how many pages to select
        try:
            x = int(input(f"How many Easy pages to select (max {len(easy_pages)-len(selected_pages_set['easy'])}): "))
            y = int(input(f"How many Medium pages to select (max {len(medium_pages)-len(selected_pages_set['medium'])}): "))
            z = int(input(f"How many Hard pages to select (max {len(hard_pages)-len(selected_pages_set['hard'])}): "))
        except ValueError:
            print("Please enter a valid number.")
            return

        # Ensure the numbers do not exceed available pages
        if x > len(easy_pages) or y > len(medium_pages) or z > len(hard_pages):
            print("Error: Number of pages requested exceeds available pages.")
            return

        selected_easy_pages = select_pages(easy_pages, x, selected_pages_set["easy"])
        selected_medium_pages = select_pages(medium_pages, y, selected_pages_set["medium"])
        selected_hard_pages = select_pages(hard_pages, z, selected_pages_set["hard"])

        final_pages = {
            merged_easy_path: selected_easy_pages,
            merged_medium_path: selected_medium_pages,
            merged_hard_path: selected_hard_pages
        }

        # Create student PDF
        pdf_writer = PdfWriter()
        for pdf_path, pages in final_pages.items():
            pdf_reader = PdfReader(pdf_path)
            for page_num in pages:
                pdf_writer.add_page(pdf_reader.pages[page_num])

        with open(output_path, 'wb') as out_pdf:
            pdf_writer.write(out_pdf)

        # Update and save selected pages
        selected_pages = {
            "easy": list(selected_pages_set["easy"]),
            "medium": list(selected_pages_set["medium"]),
            "hard": list(selected_pages_set["hard"])
        }
        save_selected_pages(history_file, selected_pages)
        print(f"Student PDF created at: {output_path}")

    # Directory containing your exercise PDFs
    directory = os.path.join(template_dir, "blanks")
    # Output paths for the merged PDFs
    output_path = os.path.join(student_dir, f"{section}_{set_number}_{student_name}.pdf")
    # File to keep track of selected pages
    history_file = os.path.join(student_dir, "selected_pages_history.json")
    main(directory, output_path, history_file)


Total Available = 235
Easy Pages: Available = 95, Selected = 0
Medium Pages: Available = 82, Selected = 0
Hard Pages: Available = 58, Selected = 0


Student PDF created at: ./student_files/renato/math/problems/problems_1.1_renato.pdf
