# Image Encoder


In [1]:
import requests
import base64

In [20]:
API_KEY_REF = "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"

# Encodes Image to Base64 so that the ai 
def encode_image_to_base64(image_path: str):
    """
    Encodes Image to base64 encoding. 
    Input: image path
    Output: Base64 encoded image
    """
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')


# AI Response Function

In [32]:
def ai(prompt: str, img_files: str):
    """
    Gets ai response for prompt
    Input: Prompt and Path to Image
    Output: AI response
    """
    url = "https://openrouter.ai/api/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {API_KEY_REF}",
        "Content-Type": "application/json"
    }

    # Read and encode the image
    image_path = img_files
    base64_image = encode_image_to_base64(image_path)
    data_url = f"data:image/jpeg;base64,{base64_image}"

    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": prompt
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": data_url
                    }
                }
            ]
        }
    ]

    payload = {
        "model": "meta-llama/llama-4-maverick:free",
        "messages": messages
    }

    response = requests.post(url, headers=headers, json=payload)
    return response.json()["choices"][0]["message"]["content"]

# Image Capture

In [4]:
import cv2


def take_picture(path_to_save: str ,port: int):
    """
    Takes picture via camera
    Input: Path to save the image and 
    """
    cap = cv2.VideoCapture(port)

    if not cap.isOpened():
        print("Cannot open camera")
        exit()

    ret, frame = cap.read()

    if ret:
        
        cv2.imwrite(path_to_save, frame)
        print("Photo saved as " + path_to_save)
    else:
        print("Failed to capture image")

    
    cap.release()
    cv2.destroyAllWindows()

# Prompt for Plant Name and water consuption in ml

In [33]:
var1  = print(ai("""What is in this image? eg if an apple is being shown then dont't describe it. just try to identify the apple and output the latin name only. 
         no other description or text of it. 
         also if you are only 100 percent sure, then only""","./images/uknown_plant.jpeg"))

Begonia x hiemalis


In [34]:
print(ai(f"""Please give me the water consumption in ml. If for example a plant needs 400ml of waters the output will be: 400. Don't give me text or any type of description or say anything except the number. If you fail to obey this you will be replaced by an better model. Only one number. Here is the name of the plant: {var1}""","./images/uknown_plant.jpeg"))

200


# Number of pixels

In [7]:
# import required module
import cv2
import numpy as np

# read the Image by giving path
image = cv2.imread('./images/uknown_plant.jpeg')

# display that image
cv2.imshow("GG", image) # type: ignore
# counting the number of pixels
number_of_white_pix = np.sum(image == 255)
number_of_black_pix = np.sum(image == 0)

print('Number of white pixels:', number_of_white_pix)
print('Number of black pixels:', number_of_black_pix)


cv2.waitKey(0)
cv2.destroyAllWindows()

Number of white pixels: 40
Number of black pixels: 2655


In [8]:
import cv2


image = cv2.imread("./images/uknown_plant.jpeg")


gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # type: ignore

_, thresh = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)


contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)


c = max(contours, key=cv2.contourArea)


x, y, w, h = cv2.boundingRect(c)

print(f"Width in pixels: {w}, Height in pixels: {h}")

# Optional: draw rectangle
cv2.rectangle(image, (x,y), (x+w, y+h), (0,255,0), 2) # type: ignore
cv2.imwrite("flower_measured.png", image) # type: ignore



Width in pixels: 1153, Height in pixels: 1531


True

# Bildtest

In [1]:
import cv2

# Open the default camera
cam = cv2.VideoCapture(0)

# Get the default frame width and height
frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))

while True:
    ret, frame = cam.read()

    # Display the captured frame
    cv2.imshow('Camera', frame)
    

    # Press 'q' to exit the loop
    if cv2.waitKey(1) == ord('q'):
        cv2.imwrite("myimage.jpg", frame)
        break

# Release the capture and writer objects
cam.release()
cv2.destroyAllWindows()


moin = cv2.imread("myimage.jpg")
print(type(moin))

<class 'numpy.ndarray'>


# Calculate distance equivalence in pixels

In [79]:
import cv2
import numpy as np
import os

# --- SETTINGS: adjust these ---
img_path = "./myimage.jpg"  # your image path
Z_cm = 50.0               # distance camera -> ruler in cm (you said ~50cm)
real_ruler_length_mm = 300.0  # the real physical length of the ruler segment you will use (mm)
# ----------------------------------------------------------------

Z_mm = Z_cm * 10.0  # cm -> mm

img = cv2.imread(img_path)
if img is None:
    raise FileNotFoundError(f"Could not load image: {os.path.abspath(img_path)}")

h, w = img.shape[:2]
print("Image size (px):", w, "x", h)

# Convert to HSV to mask red ruler
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

# two red ranges (wrap-around)
lower1 = np.array([0, 70, 50])
upper1 = np.array([10, 255, 255])
lower2 = np.array([170, 70, 50])
upper2 = np.array([180, 255, 255])

mask1 = cv2.inRange(hsv, lower1, upper1)
mask2 = cv2.inRange(hsv, lower2, upper2)
mask = cv2.bitwise_or(mask1, mask2)

# Morphological clean-up
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=2)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)

# Find contours
cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not cnts:
    raise RuntimeError("No red contour (ruler) found. Adjust color thresholds or check image.")

# choose largest red contour
c = max(cnts, key=cv2.contourArea)
area = cv2.contourArea(c)
print("Detected red contour area (px^2):", int(area))

# get minimum area rectangle -> gives width/height in px
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect).astype(int)
box_w = int(rect[1][0])
box_h = int(rect[1][1])
ruler_pixel_length = max(box_w, box_h)  # long side is the ruler length in px
print("Ruler pixel length (px):", ruler_pixel_length, " (box_w, box_h)=", box_w, box_h)

# Compute focal length in pixels:
f_px = (ruler_pixel_length * Z_mm) / real_ruler_length_mm
print(f"Estimated focal length (pixels): {f_px:.2f}")

# Example: convert an object measured in pixels (use your measured w,h)
# Replace these two with measurements you already computed or compute here.
object_pixel_width = 1153
object_pixel_height = 1531

real_width_mm = (object_pixel_width * Z_mm) / f_px
real_height_mm = (object_pixel_height * Z_mm) / f_px

print(f"Object size (estimated) -> width: {real_width_mm:.1f} mm, height: {real_height_mm:.1f} mm")
print(f"In cm -> width: {real_width_mm/10:.1f} cm, height: {real_height_mm/10:.1f} cm")

# Visualize detected ruler and bounding box (optional)
vis = img.copy()
cv2.drawContours(vis, [box], 0, (0,255,0), 2)
cv2.putText(vis, f"ruler_px={ruler_pixel_length}", (10,30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)
out_path = "/mnt/data/ruler_detected.png"
cv2.imwrite(out_path, vis)
print("Wrote visualization to", out_path)


Image size (px): 640 x 480
Detected red contour area (px^2): 25893
Ruler pixel length (px): 223  (box_w, box_h)= 210 223
Estimated focal length (pixels): 371.67
Object size (estimated) -> width: 1551.1 mm, height: 2059.6 mm
In cm -> width: 155.1 cm, height: 206.0 cm
Wrote visualization to /mnt/data/ruler_detected.png


In [83]:
import math
from ultralytics import YOLO
import cv2

# Load a model
model = YOLO("yolo11n.pt")  # load a pretrained model (recommended for training)

image = cv2.imread("./images/ok.jpg")
height, width, channels = image.shape # type: ignore


results = model("./images/ok.jpg")

results[0].show()
# Length
print(results[0].boxes.xyxy)
plantLength = abs(results[0].boxes.xyxy[0][0] - results[0].boxes.xyxy[0][2]) 
print(int(plantLength))
print(int(width))

realwidth = (float(plantLength) / float(width)) * 46
print(realwidth)


image 1/1 c:\Users\Prath\Programming\esfz\images\ok.jpg: 384x640 1 potted plant, 246.9ms
Speed: 5.8ms preprocess, 246.9ms inference, 3.4ms postprocess per image at shape (1, 3, 384, 640)
tensor([[168.3134,  33.3618, 909.3212, 707.6249]])
741
1280
26.629968261718748


tensor([[168.3134,  33.3618, 909.3212, 707.6249]])
674
23.17779483795166


In [91]:
"""import cv2

# Open the default camera
cam = cv2.VideoCapture(2)

# Get the default frame width and height
frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))

while True:
    ret, frame = cam.read()

    # Display the captured frame
    cv2.imshow('Camera', frame)
    

    # Press 'q' to exit the loop
    if cv2.waitKey(1) == ord('q'):
        cv2.imwrite("myimage.jpg", frame)
        break

# Release the capture and writer objects
cam.release()
cv2.destroyAllWindows()


moin = cv2.imread("myimage.jpg")
print(type(moin))

"""



cam = cv2.VideoCapture(2)

# Get the default frame width and height
frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))


askIfReady = input("Have you turned the plant? (Yes/No)")
if askIfReady == "yes":
    ret, frame = cam.read()
    cv2.imwrite("images/ok2.jpg", frame)

    cam.release()
    cv2.destroyAllWindows()

    image = cv2.imread("./images/ok2.jpg")
    height, width, channels = image.shape # type: ignore
    results = model("./images/ok2.jpg")

results[0].show()
# Length
print(results[0].boxes.xyxy)
plantLength = abs(results[0].boxes.xyxy[0][0] - results[0].boxes.xyxy[0][2]) 
print(int(plantLength))
print(int(width))

realwidth = (float(plantLength) / float(width)) * 46
print(realwidth)

# Height
print(results[0].boxes.xyxy)
plantLength = abs(results[0].boxes.xyxy[0][1] - results[0].boxes.xyxy[0][3]) 
print(int(plantLength))

realwidth = (float(plantLength) / float(width)) * 44
print(realwidth)



image 1/1 c:\Users\Prath\Programming\esfz\images\ok2.jpg: 480x640 1 potted plant, 1 dining table, 1 cell phone, 1 book, 368.8ms
Speed: 9.4ms preprocess, 368.8ms inference, 15.5ms postprocess per image at shape (1, 3, 480, 640)
tensor([[4.0116e+01, 4.8799e-01, 3.9066e+02, 2.5533e+02],
        [5.7285e-01, 3.8852e+02, 3.0220e+02, 4.7974e+02],
        [1.6982e+01, 1.7496e+02, 6.3863e+02, 4.8000e+02],
        [1.9199e+02, 2.2303e+02, 5.1507e+02, 4.2739e+02]])
350
640
25.195003890991213
tensor([[4.0116e+01, 4.8799e-01, 3.9066e+02, 2.5533e+02],
        [5.7285e-01, 3.8852e+02, 3.0220e+02, 4.7974e+02],
        [1.6982e+01, 1.7496e+02, 6.3863e+02, 4.8000e+02],
        [1.9199e+02, 2.2303e+02, 5.1507e+02, 4.2739e+02]])
254
17.520274925231934
