<a href="https://colab.research.google.com/github/Nada-Abidii/AI-for-FinTech/blob/main/ocr_pie_chart.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

####1. Installations

In [None]:
!pip install roboflow
!pip install pytesseract
!apt-get install tesseract-ocr


### 2. Importations

In [None]:
from roboflow import Roboflow
import pytesseract
from PIL import Image, ImageDraw, ImageOps, ImageEnhance

#### 3. Image Preparation and Preprocessing


In [None]:
# Roboflow API key and model info
api_key = "**********"
workspace_id = "*************"
project_id = "************"
version_number = 3

# Load the model
rf = Roboflow(api_key=api_key)
project = rf.workspace(workspace_id).project(project_id)
model = project.version(version_number).model

# Function to draw boxes around detections
def draw_boxes(image, detections):
    draw = ImageDraw.Draw(image)
    for detection in detections["predictions"]:
        box = [
            detection["x"] - detection["width"] / 2,
            detection["y"] - detection["height"] / 2,
            detection["x"] + detection["width"] / 2,
            detection["y"] + detection["height"] / 2,
        ]
        label = detection["class"]
        draw.rectangle(box, outline="red", width=2)
        draw.text((box[0], box[1]), label, fill="red")
    return image

#### 4. Text Extraction with OCR




In [None]:
# Extract text from specific regions of the image using OCR
def extract_text_from_image(image, region=None):
    if region:
        cropped_image = image.crop(region)
    else:
        cropped_image = image
    text = pytesseract.image_to_string(cropped_image, config='--psm 6') # This line uses the imported library
    return text

# Extract title from the top of the chart
def extract_title(image):
    width, height = image.size
    title_region = (0, 0, width, height * 0.2)  # Top 20% of the image
    title_text = extract_text_from_image(image, title_region)
    return title_text.strip()

# Extract classes from the pie chart
def extract_classes(image):
    width, height = image.size
    region = (0.68 * width, 0, width, height)
    text = extract_text_from_image(image, region).strip()
    return text

# Process the pie chart
detection = model.predict('Pie-Chart-3.png', confidence=40, overlap=30).json()
image3 = Image.open('Pie-Chart-3.png').convert("RGB")
image3 = draw_boxes(image3, detection)
image3.save('annotated3.png')

# Assuming crop_and_save is defined elsewhere
# If not, you'll need to define it to avoid further errors
# For example:
def crop_and_save(image, detection):
    # Replace this with your actual logic to crop and save images
    return [image]  # This is a placeholder, replace with your actual implementation

cropped_images3 = crop_and_save(image3, detection)
for j, cropped_image3 in enumerate(cropped_images3):
    title = extract_title(cropped_image3)
    classes = extract_classes(cropped_image3)
    print(f'Title for chart box {j}: {title}')
    print(f'Classes for chart box {j}: {classes}')