<a href="https://colab.research.google.com/github/ParthM200/AP-CSA-Salesperson-Project/blob/main/Parth_and_Jugal_AP_FRQ_Grader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch transformers PyPDF2 numpy scikit-learn

import torch
from transformers import BertTokenizer, BertModel
import numpy as np
from PyPDF2 import PdfReader
from sklearn.metrics.pairwise import cosine_similarity
import os


def read_pdf(file_path):
    reader = PdfReader(file_path)
    text = ''
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text

def parse_responses(pdf_text):
    # Implement the actual parsing logic based on your PDF structure
    # * Write out how each question is labeled as (Ex. 4a, 4b, 4c, etc.)
    responses = {1: "(a)", 2: "(c)"}
    return responses

def get_bert_embeddings(text):
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertModel.from_pretrained('bert-base-uncased')
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state[0].mean(dim=0).numpy()
    return embeddings

def grade_response(question_response, sample_responses, question_number):
    question_embedding = get_bert_embeddings(question_response)
    sample_embeddings = [get_bert_embeddings(sample_response) for sample_response, _ in sample_responses[question_number]]

    sample_embedding = np.array(sample_embeddings)
    sample_embedding = sample_embedding.reshape(-1, sample_embedding.shape[-1])

    question_embedding = question_embedding.reshape(-1, question_embedding.shape[-1])

    similarities = [cosine_similarity([question_embedding.flatten()], [sample_embedding.flatten()])[0][0] for sample_embedding in sample_embeddings]
    max_index = np.argmax(similarities)
    return sample_responses[question_number][max_index][1]


# Define individual rubrics for each question.

#* Make a rubric for each question here and the point value it would receive
rubrics = {
    1: {1: "The response indicates that the inner membranes permit different reactions to occur in different regions of the organelles.", 0: "The response does not indicate that the inner membranes permit different reactions to occur in different regions of the organelles."},
    2: {1: "The response indicates that the folding increases surface area so that the membrane can hold more enzymes, and more reactions (e.g., production) can take place than if less surface area was present.", 0: "The response does not indicate that the folding increases surface area so that the membrane can hold more enzymes, and more reactions (e.g., production) can take place than if less surface area was present)."}
}

# * Input the student responses and the scores they received below
sample_responses = {
    1: [("The membranes that separate the different regions of the mitochondria and chloroplast are important because they help to separate the enzymes of these organelles so that they don't interfere with the other reactions inside the cell.", 1),
         ("The inner membranes of energy-producing organelles such as the mitochondria in Animal cells and the chloroplasts in plant cells are important for cellular processes because they produce ATP which powers the cell, essentially powering the body.", 0)],
    2: [("The folding of the mitochondrial inner membrane and thylakoids are beneficial to the cell carrying out chemical reactions because these folds help to increase the size of the cell and increase the SA: VA Ratio and thus help improve the rate at which the chemical reactions take place.", 1),
         ("This is beneficial to cells carrying out chemical reactions because the inner membranes of power-producing organelles help power those chemical reactions. Without the inner membranes functioning all processes would cease. ", 0)],
}

# List of document paths
pdf_file_paths = ["/content/AP Bio FRQ Assignment"]

for pdf_file_path in pdf_file_paths:
    if not os.path.exists(pdf_file_path):
        raise FileNotFoundError("The specified file path does not exist.")
    else:
        pdf_text = read_pdf(pdf_file_path)
        parsed_responses = parse_responses(pdf_text)

        graded_responses = {}
        for question_number, response_text in parsed_responses.items():
            # Ensure that the correct sample_responses are used for each question
            sample_responses_for_question = sample_responses.get(question_number, [])
            grade = grade_response(response_text, sample_responses_for_question, question_number)
            graded_responses[question_number] = (grade, rubrics[question_number][grade])

        print(f"Graded Responses for {pdf_file_path}:")
        for question_number, (grade, description) in graded_responses.items():
            print(f"Question {question_number}: Grade {grade}, {description}")

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54

FileNotFoundError: The specified file path does not exist.

In [None]:
sample_responses = {
    1: [("The membranes that separate the different regions of the mitochondria and chloroplast are important because they help to separate the enzymes of these organelles so that they don't interfere with the other reactions inside the cell.", 1),
         ("The inner membranes of energy-producing organelles such as the mitochondria in Animal cells and the chloroplasts in plant cells are important for cellular processes because they produce ATP which powers the cell, essentially powering the body.", 0)],
    2: [("The folding of the mitochondrial inner membrane and thylakoids are beneficial to the cell carrying out chemical reactions because these folds help to increase the size of the cell and increase the SA: VA Ratio and thus help improve the rate at which the chemical reactions take place.", 1),
         ("This is beneficial to cells carrying out chemical reactions because the inner membranes of power-producing organelles help power those chemical reactions. Without the inner membranes functioning all processes would cease. ", 0)],
}

In [None]:
#