####1. Installations

In [None]:
!pip install roboflow
!pip install pytesseract
!apt-get install tesseract-ocr

### 2. Importations

In [None]:
from roboflow import Roboflow
import pytesseract
from PIL import Image, ImageDraw, ImageOps, ImageEnhance

#### 3. Image Preparation and Preprocessing


In [None]:
# Roboflow API key and model info
api_key = "**********"
workspace_id = "*************"
project_id = "************"
version_number = 3

# Load the model
rf = Roboflow(api_key=api_key)
project = rf.workspace(workspace_id).project(project_id)
model = project.version(version_number).model

# Function to draw boxes around detections
def draw_boxes(image, detections):
    draw = ImageDraw.Draw(image)
    for detection in detections["predictions"]:
        box = [
            detection["x"] - detection["width"] / 2,
            detection["y"] - detection["height"] / 2,
            detection["x"] + detection["width"] / 2,
            detection["y"] + detection["height"] / 2,
        ]
        label = detection["class"]
        draw.rectangle(box, outline="red", width=2)
        draw.text((box[0], box[1]), label, fill="red")
    return image



def preprocess_image(image):
    image = ImageOps.grayscale(image)  # Convert to grayscale
    image = ImageEnhance.Contrast(image).enhance(2)  # Increase contrast
    image = image.resize((image.width * 2, image.height * 2), Image.ANTIALIAS)  # Resize for better resolution
    return image

#### 4. Text Extraction with OCR


In [None]:
# Extract text from specific regions of the image using OCR
def extract_text_from_image(image, region=None):
    if region:
        cropped_image = image.crop(region)
    else:
        cropped_image = image
    text = pytesseract.image_to_string(cropped_image, config='--psm 6')
    return text

# Extract title from the top of the chart
def extract_title(image):
    width, height = image.size
    title_region = (0, 0, width, height * 0.2)  # Top 20% of the image
    title_text = extract_text_from_image(image, title_region)
    return title_text.strip()

# Extract axis labels from the edges of the chart
def extract_axis_labels(image):
    width, height = image.size
    x_axis_region = (0, height * 0.9, width, height)  # Bottom 10% of the image
    y_axis_region = (0, 0, width * 0.1, height)       # Left 10% of the image
    x_axis_text = extract_text_from_image(image, x_axis_region).strip()

    y_axis_cropped = image.crop(y_axis_region)
    y_axis_rotated = y_axis_cropped.rotate(270, expand=True)
    y_axis_text = extract_text_from_image(y_axis_rotated).strip()
    return x_axis_text, y_axis_text

# Predict and annotate the chart
detection = model.predict('Line-Graph-05-min.png', confidence=50, overlap=30).json()
image = Image.open('Line-Graph-05-min.png')
image = draw_boxes(image, detection)
image.save('annotated_line_graph.png')

def crop_and_save(image, detections):
    cropped_images = []
    for j, detection in enumerate(detections["predictions"]):
        box = (
            detection["x"] - detection["width"] / 2,
            detection["y"] - detection["height"] / 2,
            detection["x"] + detection["width"] / 2,
            detection["y"] + detection["height"] / 2,
        )
        cropped_image = image.crop(box)
        cropped_images.append(cropped_image)
    return cropped_images

cropped_images = crop_and_save(image, detection)

for j, cropped_image in enumerate(cropped_images):
    title = extract_title(cropped_image)
    x_axis, y_axis = extract_axis_labels(cropped_image)
    print(f'Title for chart box {j}: {title}')
    print(f'X-axis label for chart box {j}: {x_axis}')
    print(f'Y-axis label for chart box {j}: {y_axis}')