In [9]:
import numpy as np
from openvino.runtime import Core
import cv2
import os
import json

# Directories for captured and preprocessed frames
captured_frames_dir = "captured_frames"
preprocessed_frames_dir = "preprocessed_frames"
os.makedirs(captured_frames_dir, exist_ok=True)
os.makedirs(preprocessed_frames_dir, exist_ok=True)

def preprocess_image(image_path):
    print(f"Loading image from path: {image_path}")
    image = cv2.imread(image_path)
    if image is None:
        print(f"Failed to load image from {image_path}")
        return None
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    resized_image = cv2.resize(image_rgb, (1280, 768))
    normalized_image = resized_image.astype(np.float32) / 255.0
    batched_image = np.expand_dims(normalized_image, axis=0)
    preprocessed_image_path = os.path.join(preprocessed_frames_dir, "preprocessed_frame.png")
    cv2.imwrite(preprocessed_image_path, (resized_image * 255).astype(np.uint8))
    return batched_image

def perform_ocr(image):
    ie = Core()
    model_path = r"C:\Users\Arjun\Dhanvantri\intel\text-detection-0004\FP32\text-detection-0004.xml"
    weights_path = r"C:\Users\Arjun\Dhanvantri\intel\text-detection-0004\FP32\text-detection-0004.bin"
    
    # Load and compile the model
    net = ie.read_model(model=model_path, weights=weights_path)
    compiled_model = ie.compile_model(model=net, device_name="CPU")
    
    input_layer = compiled_model.input(0)
    output_layer = compiled_model.output(0)
    
    # Get model's expected input shape
    input_shape = input_layer.shape  # Should be [1, 768, 1280, 3]
    h, w, c = input_shape[1:4]  # Extract height, width, and channel info

    # Resize the image to match model's expected input size (768, 1280)
    if image.shape[1] != h or image.shape[2] != w:
        print(f"Resizing image to expected dimensions: ({h}, {w})")
        resized_image = cv2.resize(image[0], (w, h))  # Resize only the first element of batched_image
    else:
        resized_image = image[0]  # If already the correct size, use the image as is

    # No need to transpose, keep the shape as (H, W, C)
    resized_image = resized_image.astype(np.float32)  # Ensure it's float32 type
    batched_image = np.expand_dims(resized_image, axis=0)  # Add batch dimension (1, H, W, C)

    # Perform inference
    result = compiled_model([batched_image])[output_layer]

    # Extract the text result from the model output
    text = result[0]
    return text



def live_scan_document():
    cap = cv2.VideoCapture(0)
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        cv2.imshow("Live Document Scan", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
    if ret:
        captured_frame_path = os.path.join(captured_frames_dir, "captured_frame.png")
        cv2.imwrite(captured_frame_path, frame)
        return captured_frame_path
    else:
        print("Failed to capture the document")
        return None

def extract_and_compare_medicines(ocr_text, json_file_path):
    with open(json_file_path, 'r') as file:
        medicine_data = json.load(file)
    print("Loaded Medicine Data:", medicine_data)
    if isinstance(medicine_data, list):
        medicine_names = [medicine['name'] for medicine in medicine_data]
    elif isinstance(medicine_data, dict) and 'medicines' in medicine_data:
        medicine_names = [medicine['name'] for medicine in medicine_data['medicines']]
    else:
        print("Unexpected JSON structure")
        return []
    recognized_medicines = []
    for name in medicine_names:
        if name.lower() in ocr_text.lower():
            recognized_medicines.append(name)
    return recognized_medicines

# Example usage
captured_image_path = live_scan_document()
if captured_image_path:
    preprocessed_frame = preprocess_image(captured_image_path)
    if preprocessed_frame is not None:
        ocr_result = perform_ocr(preprocessed_frame)
        print("OCR Result:", ocr_result)
        json_file_path = "Medicines.json"  # Update this with the actual path to your JSON file
        medicines = extract_and_compare_medicines(ocr_result, json_file_path)
        print("Recognized Medicines:", medicines)
    else:
        print("Preprocessing failed.")
else:
    print("Image capture failed.")


Loading image from path: captured_frames\captured_frame.png
OCR Result: [[[ 3.4877636 -3.1968932]
  [ 3.5155587 -3.2959235]
  [ 3.4266248 -3.4069686]
  ...
  [ 3.6873827 -3.4542458]
  [ 3.4111779 -3.900973 ]
  [ 3.8870025 -3.2579443]]

 [[ 4.502776  -2.2526045]
  [ 3.4831135 -3.2970302]
  [ 3.495217  -3.3196104]
  ...
  [ 3.5555472 -3.4787579]
  [ 3.3591306 -3.7364821]
  [ 3.9895773 -3.229161 ]]

 [[ 4.8983655 -1.946895 ]
  [ 3.79108   -3.052875 ]
  [ 3.7515838 -3.24178  ]
  ...
  [ 3.6101964 -3.5050066]
  [ 3.4384053 -3.6755028]
  [ 4.03691   -3.236055 ]]

 ...

 [[ 3.9914985 -3.1603901]
  [ 2.9506505 -4.081049 ]
  [ 2.9842846 -4.0578337]
  ...
  [ 3.287058  -4.3988156]
  [ 3.1431003 -4.3846283]
  [ 3.733561  -3.97016  ]]

 [[ 3.4679098 -3.4335785]
  [ 2.517486  -4.4029865]
  [ 2.6565177 -4.1537814]
  ...
  [ 3.0603795 -4.485783 ]
  [ 2.89885   -4.5038543]
  [ 3.506178  -3.9436402]]

 [[ 3.0950177 -3.971089 ]
  [ 3.1020882 -3.8152745]
  [ 3.1262665 -3.8207896]
  ...
  [ 3.5081127 -4.1

FileNotFoundError: [Errno 2] No such file or directory: 'path_to_your_medicines.json'