# Ollama-OCR Implementation using llama 3.2

##### Github for referrence: https://github.com/imanoop7/Ollama-OCR/blob/main/src/ollama_ocr/ocr_processor.py

### installation of Ollama

In [None]:
!pip install ollama-ocr

### installation of Sentence-Transformers

In [None]:
!pip install sentence_transformers

### Install the llama 3.2 llb model

In [None]:
!ollama pull llama3.2-vision:11b

In [3]:
import tensorflow as tf

# Class Code of OCRProcessors

In [19]:
import json
from typing import Dict, Any, List, Union
import os
import base64
import requests
from tqdm import tqdm
import concurrent.futures
from pathlib import Path
import cv2
from pdf2image import convert_from_path

class OCRProcessors:
    def __init__(self, model_name: str = "llama3.2-vision:11b", 
                 base_url: str = "http://localhost:11434/api/generate",
                 max_workers: int = 1):
        
        self.model_name = model_name
        self.base_url = base_url
        self.max_workers = max_workers

    def _encode_image(self, image_path: str) -> str:
        """Convert image to base64 string"""
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode("utf-8")

    def _preprocess_image(self, image_path: str) -> str:
        """
        Preprocess image before OCR:
        - Convert PDF to image if needed
        - Auto-rotate
        - Enhance contrast
        - Reduce noise
        """

        # Read image
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Could not read image at {image_path}")

        # Convert to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Enhance contrast using CLAHE
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        enhanced = clahe.apply(gray)

        # Denoise
        denoised = cv2.fastNlMeansDenoising(enhanced)

        # Auto-rotate if needed
        # TODO: Implement rotation detection and correction

        # Save preprocessed image
        preprocessed_path = f"{image_path}_preprocessed.jpg"
        cv2.imwrite(preprocessed_path, denoised)

        return preprocessed_path

    def process_image(self, image_path: str, format_type: str = "darshdown", preprocess: bool = True) -> str:
        """
        Process an image and extract text in the specified format
        
        Args:
            image_path: Path to the image file
            format_type: One of ["markdown", "text", "json", "structured", "key_value"]
            preprocess: Whether to apply image preprocessing
        """
        try:
            if preprocess:
                image_path = self._preprocess_image(image_path)
            
            image_base64 = self._encode_image(image_path)
            
            # Clean up temporary files
            if image_path.endswith(('_preprocessed.jpg', '_temp.jpg')):
                os.remove(image_path)

            # Generic prompt templates for different formats
            prompts = {
                  "darshdown": """Extract the text only."""
            }

            # Get the appropriate prompt
            prompt = prompts.get(format_type, prompts["darshdown"])

            # Prepare the request payload
            payload = {
                "model": self.model_name,
                "prompt": prompt,
                "stream": False,
                "images": [image_base64]
            }

            # Make the API call to Ollama
            response = requests.post(self.base_url, json=payload)
            response.raise_for_status()  # Raise an exception for bad status codes
            
            result = response.json().get("response", "")
            
            # Clean up the result if needed
            if format_type == "json":
                try:
                    # Try to parse and re-format JSON if it's valid
                    json_data = json.loads(result)
                    return json.dumps(json_data, indent=2)
                except json.JSONDecodeError:
                    # If JSON parsing fails, return the raw result
                    return result
            
            return result
        except Exception as e:
            return f"Error processing image: {str(e)}"

    def process_batch(
        self,
        input_path: Union[str, List[str]],
        format_type: str = "darshdown",
        recursive: bool = False,
        preprocess: bool = True
    ) -> Dict[str, Any]:
        """
        Process multiple images in batch
        
        Args:
            input_path: Path to directory or list of image paths
            format_type: Output format type
            recursive: Whether to search directories recursively
            preprocess: Whether to apply image preprocessing
            
        Returns:
            Dictionary with results and statistics
        """
        # Collect all image paths
        image_paths = []
        if isinstance(input_path, str):
            base_path = Path(input_path)
            if base_path.is_dir():
                pattern = '**/*' if recursive else '*'
                for ext in ['.png', '.jpg', '.jpeg', '.pdf', '.tiff']:
                    image_paths.extend(base_path.glob(f'{pattern}{ext}'))
            else:
                image_paths = [base_path]
        else:
            image_paths = [Path(p) for p in input_path]

        results = {}
        errors = {}
        
        # Process images in parallel with progress bar
        with tqdm(total=len(image_paths), desc="Processing images") as pbar:
            with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
                future_to_path = {
                    executor.submit(self.process_image, str(path), format_type, preprocess): path
                    for path in image_paths
                }
                
                for future in concurrent.futures.as_completed(future_to_path):
                    path = future_to_path[future]
                    try:
                        results[str(path)] = future.result()
                    except Exception as e:
                        errors[str(path)] = str(e)
                    pbar.update(1)

        return {
            "results": results,
            "errors": errors,
            "statistics": {
                "total": len(image_paths),
                "successful": len(results),
                "failed": len(errors)
            }
        }

# Implementation of the OCR model 

In [9]:
# from ollama_ocr import OCRProcessor
# Create an instance
with tf.device('/GPU:0'):
    ocr = OCRProcessorss(model_name='llama3.2-vision:11b')
    
    # Test with an image (replace with your image path)
    result = ocr.process_image(
        image_path="C:\\Users\\darsh\\Desktop\\6th Sem\\SGP-IV\\Input Images\\Sample Image2.jpg",
        format_type="darshdown"
    )
    print(result)

TO, HOD

Subject: Request for subject change from RM to CP

Respected sir,

I am Shauti Panchal, currently in semester 6th/3rd year in CSE-1, awaiting to request a subject change from Research Methodology (RM) to Competitive Programming (CP) for the current semester.

I had already taken RM last semester and now I wish to explore CP to enhance my skills and growth in this field.

As I am not opting for placements, I believe this subject will help me focus on courses that align with my personal aspirations.

I kindly request your approval for this change.

Thank you for your understanding and support.


# Batch Wise Implementation for low Computation

#### batch wise results will be store into the variable 

In [8]:
# Test batch processing
with tf.device('/GPU:0'):
    ocr = OCRProcessors(model_name='llama3.2-vision:11b')
    batch_results = ocr.process_batch("C:\\Users\\darsh\\Desktop\\6th Sem\\SGP-IV\\Input Images\\Sample Image2.jpg")
    print(batch_results['statistics'])

Processing images: 100%|████████████████████████████████████████████████████████████████| 1/1 [03:07<00:00, 187.20s/it]

{'total': 1, 'successful': 1, 'failed': 0}





In [9]:
# Print statistics
print("\nProcessing Statistics:")
print(f"Total images: {batch_results['statistics']['total']}")
print(f"Successfully processed: {batch_results['statistics']['successful']}")
print(f"Failed: {batch_results['statistics']['failed']}")


Processing Statistics:
Total images: 1
Successfully processed: 1
Failed: 0


In [10]:
# Get text from all successfully processed images
for file_path, text in batch_results['results'].items():
    print(f"\nFile: {file_path}")
    print("-" * 50)
    print(f"Extracted Text: {text}")


File: C:\Users\darsh\Desktop\6th Sem\SGP-IV\Input Images\Sample Image2.jpg
--------------------------------------------------
Extracted Text: To, HOD,

Subject: Request for subject change from RM to CP.

Respected sir,

I am Shauti Panchal, currently in semester 6th/3rd year in CSE-1. I am writing to request a subject change from Research Methodology (RM) to Competitive Programming (CP) for the current semester.

I had already taken RM last semester and now wish to explore CP to enhance my skills and growth in this field.

As I am not opting for placements, I believe this subject will help me focus on courses that align with my personal aspirations.

I kindly request your approval for this change.

Thank you for your understanding and support.


# Removal of Empty Space

In [14]:
# Remove \n characters
cleaned_text = text.replace('\n', '')

# Print the result
print(cleaned_text)

To, HOD,Subject: Request for subject change from RM to CP.Respected sir,I am Shauti Panchal, currently in semester 6th/3rd year in CSE-1. I am writing to request a subject change from Research Methodology (RM) to Competitive Programming (CP) for the current semester.I had already taken RM last semester and now wish to explore CP to enhance my skills and growth in this field.As I am not opting for placements, I believe this subject will help me focus on courses that align with my personal aspirations.I kindly request your approval for this change.Thank you for your understanding and support.


# Evaluation Strategy

In [20]:
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity
import re

# Load the pre-trained model and tokenizer
model_name = "sentence-transformers/all-MiniLM-L6-v2"  # Replace if needed
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Function to generate embeddings
def get_embeddings(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    # Use mean pooling for embeddings
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings.numpy()

# Function to compute similarity
def compute_similarity(original, student):
    original_emb = get_embeddings(original)
    student_emb = get_embeddings(student)
    return cosine_similarity(original_emb, student_emb)[0][0]

# Function to extract key points from reference answer
def extract_key_points(reference_answer):
    key_points = re.split(r'[.!?]', reference_answer)
    return [kp.strip() for kp in key_points if kp.strip()]

# Function to check key point coverage
def check_key_points_coverage(key_points, student_answer):
    coverage = {}
    for key_point in key_points:
        coverage[key_point] = compute_similarity(key_point, student_answer)
    return coverage

# Function to assign marks with weighting
def assign_marks_with_weighting(coverage, total_marks):
    total_weight = sum(coverage.values())
    weighted_marks = 0
    for key_point, score in coverage.items():
        weight = score / total_weight if total_weight > 0 else 1 / len(coverage)
        weighted_marks += total_marks * weight * (score if score > 0.5 else 0)
    return round(weighted_marks, 2)

# Function to calculate penalties
def calculate_penalties(student_answer, key_points):
    penalties = 0
    # Penalize irrelevance: based on length not matching key points
    if len(student_answer.split()) > len(" ".join(key_points).split()) * 1.5:
        penalties += 1  # Example penalty for verbosity
    # Add more penalty logic as needed
    return penalties

# Function to generate feedback
def generate_feedback(coverage):
    feedback = []
    for key_point, score in coverage.items():
        if score > 0.85:
            feedback.append(f"Good match for: '{key_point}'")
        elif score > 0.5:
            feedback.append(f"Partial match for: '{key_point}'")
        else:
            feedback.append(f"Missing or poor match for: '{key_point}'")
    return feedback

# Main evaluation function
def evaluate_answer(reference_answer, student_answer, total_marks):
    # Step 1: Extract key points from reference answer
    key_points = extract_key_points(reference_answer)

    # Step 2: Check coverage of key points
    coverage = check_key_points_coverage(key_points, student_answer)

    # Step 3: Assign marks with weighting
    marks = assign_marks_with_weighting(coverage, total_marks)

    # Step 4: Apply penalties
    penalties = calculate_penalties(student_answer, key_points)
    marks -= penalties
    marks = max(0, marks)  # Ensure non-negative marks

    # Step 5: Generate feedback
    feedback = generate_feedback(coverage)

    return round(marks), feedback

# Testing the implementation
if __name__ == "__main__":
    reference_answer = (
        '''Subject: Request for Subject Change from RM to CP

            Dear HOD,

            I am Shauti Panchal, a 6th-semester student in CSE-1. I request approval to change my subject from Research Methodology (RM) to Competitive Programming (CP) this semester. Having already taken RM last semester, I wish to explore CP to enhance my skills and align with my aspirations, as I am not opting for placements.

            Thank you for your consideration.'''
    )

    r = (
        '''Subject: Request for Subject Change from RM to CP

            Dear HOD,

            I am Shauti Panchal, a 6th-semester student in CSE-1. I request approval to change my subject from Research Methodology (RM) to Competitive Programming (CP) this semester. Having already taken RM last semester, I wish to explore CP to enhance my skills and align with my aspirations, as I am not opting for placements.

            Thank you for your consideration.'''
    )
    
    total_marks = 10

    # Evaluate the answer
    marks, feedback = evaluate_answer(reference_answer, r, total_marks)

    # Print the results
    print(f"Assigned Marks: {marks}")
    print("Feedback:")
    for line in feedback:
        print(f"- {line}")



Assigned Marks: 8
Feedback:
- Good match for: 'Subject: Request for Subject Change from RM to CP

            Dear HOD,

            I am Shauti Panchal, a 6th-semester student in CSE-1'
- Good match for: 'I request approval to change my subject from Research Methodology (RM) to Competitive Programming (CP) this semester'
- Partial match for: 'Having already taken RM last semester, I wish to explore CP to enhance my skills and align with my aspirations, as I am not opting for placements'
- Missing or poor match for: 'Thank you for your consideration'


# Evaluation Strategy Version-1

In [4]:
from sentence_transformers import SentenceTransformer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import re
import numpy as np

# Load models
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')

# Preprocessing
def preprocess_text(text):
    return re.sub(r'\s+', ' ', re.sub(r'[^\w\s]', '', text.lower())).strip()

# Semantic similarity using SBERT
def calculate_sbert_similarity(ref, cand):
    ref_embedding = sbert_model.encode([ref])[0]
    cand_embedding = sbert_model.encode([cand])[0]
    similarity = np.dot(ref_embedding, cand_embedding) / (np.linalg.norm(ref_embedding) * np.linalg.norm(cand_embedding))
    return similarity

# BLEU score calculation
def calculate_bleu_similarity(ref, cand):
    return sentence_bleu([ref.split()], cand.split(), smoothing_function=SmoothingFunction().method1)

# Redundancy penalty
def calculate_redundancy_penalty(text):
    words = text.split()
    unique_words = set(words)
    redundancy_ratio = 1 - len(unique_words) / len(words) if words else 0
    return max(0, redundancy_ratio - 0.2)  # Allow 20% redundancy

# Evaluation
def evaluate_response(student_answer, evaluator_answer, weights):
    student_answer = preprocess_text(student_answer)
    evaluator_answer = preprocess_text(evaluator_answer)
    
    # BLEU Score
    bleu = calculate_bleu_similarity(evaluator_answer, student_answer)
    
    # SBERT Similarity
    sbert_similarity = calculate_sbert_similarity(evaluator_answer, student_answer)

    # Redundancy Penalty
    redundancy_penalty = calculate_redundancy_penalty(student_answer)
    
    # Final Scores based on user-defined weights
    scores = {
        "BLEU Score": bleu * 100,
        "SBERT Similarity": sbert_similarity * 100,
        "Redundancy Penalty": redundancy_penalty * 100
    }
    
    return scores

# Dynamic Mark Calculation
def calculate_marks(scores, total_marks=10, weights=None):
    if weights is None:
        weights = {
            "BLEU Score": 0.5,  # Default weightage
            "SBERT Similarity": 0.5,
            "Redundancy Penalty": -0.1  # Negative weight to reduce marks for redundancy
        }
    
    final_score = sum(scores[aspect] * weight for aspect, weight in weights.items())
    marks_obtained = max(0, (final_score / 100) * total_marks)
    return round(marks_obtained, 2)

# Feedback Generation
def generate_feedback(scores, marks_obtained, total_marks, thresholds=None):
    if thresholds is None:
        thresholds = {
            "SBERT Similarity": 70,
            "Redundancy Penalty": 10
        }
    
    feedback = []
    feedback.append(f"Content Similarity (BLEU): {scores['BLEU Score']:.2f}%")
    feedback.append(f"Semantic Similarity (SBERT): {scores['SBERT Similarity']:.2f}%")
    feedback.append(f"Redundancy Penalty: -{scores['Redundancy Penalty']:.2f}%")
    feedback.append(f"Marks: {round(marks_obtained)}/{total_marks}")
    
    if scores["SBERT Similarity"] < thresholds["SBERT Similarity"]:
        feedback.append("Suggestion: Improve semantic alignment with the evaluator's answer.")
    if scores["Redundancy Penalty"] > thresholds["Redundancy Penalty"]:
        feedback.append("Suggestion: Avoid repeating phrases unnecessarily.")
    
    return "\n".join(feedback)

# Main Function
if __name__ == "__main__":
    evaluator_answer = '''Extracted Text: To, HOD,

    Subject: Request for subject change from RM to CP.
    
    Respected sir,
    
    I am Shauti Panchal, currently in semester 6th/3rd year in CSE-1. I am writing to request a subject change from Research Methodology (RM) to Competitive Programming (CP) for the current semester.
    
    I had already taken RM last semester and now wish to explore CP to enhance my skills and growth in this field.
    
    As I am not opting for placements, I believe this subject will help me focus on courses that align with my personal aspirations.
    
    I kindly request your approval for this change.Thank you for your understanding and support.'''
    student_answer = '''Extracted Text: To, HOD, Subject: Request for subject change from RM to CP.Respected sir,I am Shauti Panchal, currently in semester 6th/3rd year in CSE-1. I am writing to request a subject change from Research Methodology (RM) to Competitive Programming (CP) for the current semester.I had already taken RM last semester and now wish to explore CP to enhance my skills and growth in this field.As I am not opting for placements, I believe this subject will help me focus on courses that align with my personal aspirations.I kindly request your approval for this change.Thank you for your understanding and support.'''
    total_marks = 10

    # User-defined evaluation scheme (weights for each aspect)
    weights = {
        "BLEU Score": 0.05,  # Example of adjusting the weight
        "SBERT Similarity": 0.95,
        "Redundancy Penalty": -0.2  # More penalty for redundancy
    }
    
    # Custom feedback thresholds (score below this value triggers specific feedback)
    thresholds = {
        "SBERT Similarity": 75,  # Custom threshold for SBERT similarity
        "Redundancy Penalty": 15  # Custom threshold for redundancy
    }

    # Evaluate and generate feedback based on the custom evaluation scheme
    scores = evaluate_response(student_answer, evaluator_answer, weights)
    marks_obtained = calculate_marks(scores, total_marks, weights)
    feedback = generate_feedback(scores, marks_obtained, total_marks, thresholds)

    print("Scores:")
    for aspect, score in scores.items():
        print(f"{aspect}: {score:.2f}%")
    print("\nFeedback:")
    print(feedback)

Scores:
BLEU Score: 81.02%
SBERT Similarity: 96.12%
Redundancy Penalty: 12.00%

Feedback:
Content Similarity (BLEU): 81.02%
Semantic Similarity (SBERT): 96.12%
Redundancy Penalty: -12.00%
Marks: 9/10
