In [None]:
!pip install pytesseract easyocr opencv-python numpy pdf2image transformers sentence-transformers pandas scikit-learn
!apt-get install poppler-utils  # Required for PDF processing


In [None]:
import pytesseract
import easyocr
import cv2
import os
import pandas as pd
import numpy as np
from pdf2image import convert_from_path
from sentence_transformers import SentenceTransformer, util
from sklearn.ensemble import RandomForestRegressor
from google.colab import files
from IPython.display import display, JSON

# Load Pretrained Sentence-BERT Model
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')

# Load or Train Grading Model (Example Training Data)
df = pd.DataFrame({
    "Similarity Score": [0.9, 0.8, 0.7, 0.6, 0.5],
    "Grammar Score": [90, 85, 80, 75, 70],
    "Final Grade": [9, 8, 7, 6, 5]
})
X = df[['Similarity Score', 'Grammar Score']]
y = df['Final Grade']

grading_model = RandomForestRegressor(n_estimators=100)
grading_model.fit(X, y)

def extract_text_from_document(pdf_path):
    """
    Extracts handwritten text from a PDF document.
    """
    extracted_text = ""
    images = convert_from_path(pdf_path)

    for i, img in enumerate(images):
        img_path = f"temp_page_{i}.jpg"
        img.save(img_path, 'JPEG')
        extracted_text += extract_text_from_image(img_path)
        os.remove(img_path)  # Cleanup temp image file

    return extracted_text

def extract_text_from_image(image_path):
    """
    Extracts handwritten text from an image using OCR.
    """
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)[1]

    reader = easyocr.Reader(['en'])
    result = reader.readtext(image, detail=0)

    return " ".join(result)

def evaluate_answer(student_answer, expected_answer):
    """
    Compares student's answer with the expected answer using SBERT.
    """
    embedding_1 = sbert_model.encode(expected_answer, convert_to_tensor=True)
    embedding_2 = sbert_model.encode(student_answer, convert_to_tensor=True)

    similarity_score = util.pytorch_cos_sim(embedding_1, embedding_2).item()
    return similarity_score

def predict_grade(similarity_score, grammar_score=85):
    """
    Predicts final grade using a trained ML model.
    """
    predicted_score = grading_model.predict([[similarity_score, grammar_score]])[0]
    return round(predicted_score, 2)

def generate_feedback(score):
    """
    Generates feedback based on score.
    """
    if score > 8:
        return "Excellent answer with strong explanations! Keep it up!"
    elif score > 6:
        return "Good attempt! Try to improve clarity and add more details."
    else:
        return "Needs improvement. Focus on key concepts and sentence structure."

# Upload PDF File
print("Please upload the PDF containing handwritten answers:")
uploaded = files.upload()
pdf_path = list(uploaded.keys())[0]

# Extract Text from Uploaded PDF
extracted_text = extract_text_from_document(pdf_path)
print("\nExtracted Answer:\n", extracted_text)

# Expected Answer (Modify as needed)
expected_answer = "Newton's Second Law states that Force equals mass times acceleration (F = ma)."

# Evaluate Answer Similarity
similarity_score = evaluate_answer(extracted_text, expected_answer)
print(f"\nSimilarity Score: {similarity_score:.2f}")

# Predict Grade
predicted_grade = predict_grade(similarity_score)
print(f"\nPredicted Grade: {predicted_grade} / 10")

# Generate Feedback
feedback = generate_feedback(predicted_grade)
print("\nFeedback:", feedback)

# Display Final Output as JSON
JSON({
    "Extracted Answer": extracted_text,
    "Similarity Score": round(similarity_score, 2),
    "Predicted Grade": predicted_grade,
    "Feedback": feedback
})
