In [42]:
import cv2
import pytesseract
from pytesseract import Output

In [44]:
import cv2
import pytesseract
import numpy as np
from collections import defaultdict

def extract_time_ranges(image_path):
    # Load the image
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Thresholding to get binary image
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    
    # Detect horizontal lines (rows)
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
    detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
    
    # Find contours for rows
    cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    
    # Sort contours from top to bottom
    cnts = sorted(cnts, key=lambda x: cv2.boundingRect(x)[1])
    
    # Dictionary to store time ranges for each day
    day_time_ranges = defaultdict(list)
    current_day = None
    
    for c in cnts:
        x, y, w, h = cv2.boundingRect(c)
        
        # Extract ROI for this row
        roi = img[y:y+h, x:x+w]
        
        # Use pytesseract to extract text
        text = pytesseract.image_to_string(roi, config='--psm 6')
        text = text.strip()
        
        # Check if this is a day row (Sunday, Monday, etc.)
        days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
        for day in days:
            if day in text:
                current_day = day
                break
        
        # If this row contains time information (for the current day)
        if current_day and any(char.isdigit() for char in text):
            # Extract time slots (simplified - would need more robust parsing)
            times = [t for t in text.split() if '-' in t and ':' in t]
            if times:
                day_time_ranges[current_day].extend(times)
    
    # Process the time ranges for each day
    result = {}
    for day, times in day_time_ranges.items():
        if not times:
            continue
            
        # Extract all start and end times
        all_times = []
        for time_range in times:
            start, end = time_range.split('-')
            all_times.append(start.strip())
            all_times.append(end.strip())
        
        # Find min and max times
        if all_times:
            # Convert to minutes for easy comparison
            def time_to_minutes(t):
                h, m = map(int, t.split(':'))
                return h * 60 + m
                
            minutes = [time_to_minutes(t) for t in all_times]
            min_time = min(minutes)
            max_time = max(minutes)
            
            # Convert back to time string
            def minutes_to_time(m):
                return f"{m//60:02d}:{m%60:02d}"
                
            result[day] = f"{minutes_to_time(min_time)}-{minutes_to_time(max_time)}"
    
    return result

# Example usage
time_ranges = extract_time_ranges("media/class-routine.png")
print(time_ranges)

{}
