### Setup libraries and Gemini API Key 

In [None]:
import constants # to use the constants defined in the constants.py file
import os
from google import genai # to use the GenAI API
from pydantic import BaseMode   l # to enforce a .JSON output schema to the model
from ultralytics import YOLO
import supervision as sv
import cv2

### Gemini Models
|a|b|c|
|-------|-------|-------|
|Gemini 2.5 Pro Experimental|**gemini-2.5-pro-exp-03-25**|Audio, images, videos, and text 	Text 	Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more|
|Gemini 2.0 Flash| **gemini-2.0-flash**	|Audio, images, videos, and text 	Text, images (experimental), and audio (coming soon) 	Next generation features, speed, thinking, realtime streaming, and multimodal generation|
|Gemini 2.0 Flash-Lite| **gemini-2.0-flash-lite**|	Audio, images, videos, and text 	Text 	Cost efficiency and low latency|
|Gemini 1.5 Flash| **gemini-1.5-flash**|	Audio, images, videos, and text 	Text 	Fast and versatile performance across a diverse variety of tasks|
|Gemini 1.5 Flash-8B| **gemini-1.5-flash-8b** |	Audio, images, videos, and text  Text 	High volume and lower intelligence tasks|
|Gemini 1.5 Pro| **gemini-1.5-pro** |	Audio, images, videos, and text 	Text  Complex reasoning tasks requiring more intelligence|
|Gemini Embedding| **gemini-embedding-exp** |	Text 	Text embeddings 	Measuring the relatedness of text strings|

### Find and retrieve the images

In [None]:
image_dir_path = 'Testes-main/Images'

image_paths = []

for filename in os.listdir(image_dir_path):
    if (filename.endswith(".jpg") or filename.endswith(".png")) and filename.startswith("fire"):
        image_path = os.path.join(image_dir_path, filename)
        image_paths.append(image_path)
        
print(f"Found {len(image_paths)} images in {image_dir_path}.")

In [None]:
image_paths.sort()
titles=[
    os.path.basename(image_path)
    for image_path in image_paths[:len(image_paths)]
    ]               

images = [
    cv2.imread(str(image_path))
    for image_path in image_paths[:len(image_paths)]
    ]

sv.plot_images_grid(images=images, titles=titles,grid_size=(3,2))

### YOLO model selection & object detection

In [None]:
model11 = YOLO('yolo11x.pt')
model12 = YOLO('yolo12x.pt')

In [None]:
results = []
titles = []
for image_path in image_paths:
    test_iamge_path = cv2.imread(str(image_path))
    test_iamge_path_resized = cv2.resize(test_iamge_path, (640, 640))
    
    results.append(model11(test_iamge_path))
    titles.append(os.path.basename(image_path)+" - yolo11")
    results.append(model11(test_iamge_path_resized))
    titles.append(os.path.basename(image_path)+" - yolo11 resized")
    results.append(model12(test_iamge_path))
    titles.append(os.path.basename(image_path)+" - yolo12")
    
    
    results.append(model12(test_iamge_path_resized))
    titles.append(os.path.basename(image_path)+" - yolo12 resized")

In [None]:
comparison_images = []
for i in range(0, len(results), 4):
    comparison_images.append(results[i][0].plot())
    comparison_images.append(results[i + 1][0].plot())
    comparison_images.append(results[i + 2][0].plot())
    comparison_images.append(results[i + 3][0].plot())

In [None]:
sv.plot_images_grid(images=comparison_images, titles=titles, grid_size=(5, 4), size=(50, 50))

### Select the image to analyze and extract the features

In [None]:
model=model11
test_iamge_path = image_paths[0]
original_image = cv2.imread(test_iamge_path)
test_result = results[0]
test_image = test_result[0].plot()
sv.plot_image(test_image)

In [None]:
# Define the origin (0, 0) point at the center of the image
origin = (original_image.shape[1] // 2, original_image.shape[0] // 2)

# Draw the X-axis
cv2.line(original_image, (0, origin[1]), (original_image.shape[1], origin[1]), (0, 255, 5), 2)

# Draw the Y-axis
cv2.line(original_image, (origin[0], 0), (origin[0], original_image.shape[0]), (0, 255, 5), 2)

# Add labels for the axes
cv2.putText(original_image, 'X', (original_image.shape[1] - 50, origin[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 5), 2)
cv2.putText(original_image, 'Y', (origin[0] + 10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 5), 2)
cv2.putText(original_image, '(0,0)', (origin[0] + 10, origin[1] + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 5), 2)

# Display the image with the referential
sv.plot_image(original_image, size=(10, 10))

In [None]:
labels = [model.names.get(cls.item()) for cls in test_result[0].boxes.cls.int()]  
        
unique_labels = set(labels)
print("Unique labels:", unique_labels)

probabilities = test_result[0].boxes.conf
print("Probabilities:", probabilities)

# Filter out bounding boxes with probabilities lower than 0.5
filtered_boxes = []
filtered_boxes = [
    box for box, prob in zip(test_result[0].boxes.xyxy, probabilities) if prob >= 0.4
]

print("Filtered boxes:", filtered_boxes)

coordinates = []

for box in filtered_boxes:
    x1, y1, x2, y2 = map(int, box)
    center_x = (x1 + x2) // 2
    center_y = (y1 + y2) // 2

    # Normalize coordinates with respect to the origin
    normalized_x = center_x - origin[0] # Normalize x-axis
    normalized_y = origin[1] - center_y  # Invert y-axis for Cartesian coordinates
    coordinates.append((normalized_x, normalized_y))
    
    text = f"({normalized_x}, {normalized_y})"
    cv2.circle(original_image, (center_x, center_y), 10, (255, 0, 0), -1)
    cv2.putText(original_image, text, (center_x + 10, center_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

print("Coordinates of the centers of the bounding boxes:", coordinates)

# Display the image with the centers drawn
sv.plot_image(original_image, size=(10, 10))

### Export to detections to .JSON format

Cannot be json must be txt or just full text

In [None]:
object_coordinates = [(label, coord[0], coord[1]) for label, coord in zip(labels, coordinates)]
print("Object coordinates:", object_coordinates)

In [None]:
import jsonschema
import json

objectschema = {
    "type": "object",
    "properties": {
        "objects": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "label": {"type": "string"},
                    "x": {"type": "number"},
                    "y": {"type": "number"}
                },
                "required": ["label", "x", "y"]
            }
        }
    },
    "required": ["objects"]
}

# Prepare data in the required format
data = {
    "objects": [
        {"label": label, "x": x, "y": y}
        for label, x, y in object_coordinates
    ]
}

# Validate the data against the schema
jsonschema.validate(instance=data, schema=objectschema)

# Save the data to a JSON file
with open("object_coordinates.txt", "w") as json_file:
    json.dump(data, json_file, indent=4)
    
# Print the JSON data
print(json.dumps(data, indent=4))

print("Object coordinates saved to object_coordinates.json")

In [None]:
client = genai.Client(api_key=constants.API_KEY)
model_name="gemini-2.5-pro-exp-03-25"

prompt = """You are a network engineer. You are given a .txt file, count the total number of users and estimate the bandwidth needed for each user.
            Additionaly replace the entries of the all the object to individual users, for example a bus counts as 3 users, so for every bus in the file you have to create three entries with the same coordinates in the response, but with the label user.
            The file contains the coordinates of the objects in the image, and you need to provide the coordinates of each user in the response.
        """
system_instructions = "Note that a car, a bus or a truck count as thee users a airplane or chopper count as 2 users and a person as a single user."
file = client.files.upload(file="object_coordinates.txt")
print(f"File uploaded: {file}")

class user(BaseModel):
    label: str
    x: float
    y: float

class Model_Output(BaseModel):
    total_users: int
    bandwidth_per_user: float
    users: list[user]
    
response = client.models.generate_content(
    model=model_name,
    contents=[file,prompt],
    config=genai.types.GenerateContentConfig(
        system_instruction=system_instructions,
        # temperature=0.5,
        #response_mime_type='application/json',
        response_schema=Model_Output.model_json_schema(),
    )
)


print(response.text)

In [None]:
import os
from google import genai # to use the GenAI API
from pydantic import BaseModel # to enforce a .JSON output schema to the model
from ultralytics import YOLO
import supervision as sv
import cv2

%env API_KEY=AIzaSyDowhUDHpvNvoZE4oruhC6PMEnqhpnPCTE

### Gemini Models
|a|b|c|
|-------|-------|-------|
|Gemini 2.5 Pro Experimental|**gemini-2.5-pro-exp-03-25**|Audio, images, videos, and text 	Text 	Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more|
|Gemini 2.0 Flash| **gemini-2.0-flash**	|Audio, images, videos, and text 	Text, images (experimental), and audio (coming soon) 	Next generation features, speed, thinking, realtime streaming, and multimodal generation|
|Gemini 2.0 Flash-Lite| **gemini-2.0-flash-lite**|	Audio, images, videos, and text 	Text 	Cost efficiency and low latency|
|Gemini 1.5 Flash| **gemini-1.5-flash**|	Audio, images, videos, and text 	Text 	Fast and versatile performance across a diverse variety of tasks|
|Gemini 1.5 Flash-8B| **gemini-1.5-flash-8b** |	Audio, images, videos, and text  Text 	High volume and lower intelligence tasks|
|Gemini 1.5 Pro| **gemini-1.5-pro** |	Audio, images, videos, and text 	Text  Complex reasoning tasks requiring more intelligence|
|Gemini Embedding| **gemini-embedding-exp** |	Text 	Text embeddings 	Measuring the relatedness of text strings|

### Find and retrieve the images

In [None]:
image_dir_path = 'Testes-main/Images'

image_paths = []

for filename in os.listdir(image_dir_path):
    if (filename.endswith(".jpg") or filename.endswith(".png")) and filename.startswith("fire"):
        image_path = os.path.join(image_dir_path, filename)
        image_paths.append(image_path)
        
print(f"Found {len(image_paths)} images in {image_dir_path}.")

In [None]:
image_paths.sort()
titles=[
    os.path.basename(image_path)
    for image_path in image_paths[:len(image_paths)]
    ]               

images = [
    cv2.imread(str(image_path))
    for image_path in image_paths[:len(image_paths)]
    ]

sv.plot_images_grid(images=images, titles=titles,grid_size=(3,2))

### YOLO model selection & object detection

In [None]:
model11 = YOLO('yolo11x.pt')
model12 = YOLO('yolo12x.pt')

In [None]:
results = []
titles = []
for image_path in image_paths:
    test_iamge_path = cv2.imread(str(image_path))
    test_iamge_path_resized = cv2.resize(test_iamge_path, (640, 640))
    
    results.append(model11(test_iamge_path))
    titles.append(os.path.basename(image_path)+" - yolo11")
    results.append(model11(test_iamge_path_resized))
    titles.append(os.path.basename(image_path)+" - yolo11 resized")
    results.append(model12(test_iamge_path))
    titles.append(os.path.basename(image_path)+" - yolo12")
    
    
    results.append(model12(test_iamge_path_resized))
    titles.append(os.path.basename(image_path)+" - yolo12 resized")

In [None]:
comparison_images = []
for i in range(0, len(results), 4):
    comparison_images.append(results[i][0].plot())
    comparison_images.append(results[i + 1][0].plot())
    comparison_images.append(results[i + 2][0].plot())
    comparison_images.append(results[i + 3][0].plot())

In [None]:
sv.plot_images_grid(images=comparison_images, titles=titles, grid_size=(5, 4), size=(50, 50))

### Select the image to analyze and extract the features

In [None]:
model=model11
test_iamge_path = image_paths[0]
original_image = cv2.imread(test_iamge_path)
test_result = results[0]
test_image = test_result[0].plot()
sv.plot_image(test_image)

In [None]:
# Define the origin (0, 0) point at the center of the image
origin = (original_image.shape[1] // 2, original_image.shape[0] // 2)

# Draw the X-axis
cv2.line(original_image, (0, origin[1]), (original_image.shape[1], origin[1]), (0, 255, 5), 2)

# Draw the Y-axis
cv2.line(original_image, (origin[0], 0), (origin[0], original_image.shape[0]), (0, 255, 5), 2)

# Add labels for the axes
cv2.putText(original_image, 'X', (original_image.shape[1] - 50, origin[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 5), 2)
cv2.putText(original_image, 'Y', (origin[0] + 10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 5), 2)
cv2.putText(original_image, '(0,0)', (origin[0] + 10, origin[1] + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 5), 2)

# Display the image with the referential
sv.plot_image(original_image, size=(10, 10))

In [None]:
labels = [model.names.get(cls.item()) for cls in test_result[0].boxes.cls.int()]  
        
unique_labels = set(labels)
print("Unique labels:", unique_labels)

probabilities = test_result[0].boxes.conf
print("Probabilities:", probabilities)

# Filter out bounding boxes with probabilities lower than 0.5
filtered_boxes = []
filtered_boxes = [
    box for box, prob in zip(test_result[0].boxes.xyxy, probabilities) if prob >= 0.4
]

print("Filtered boxes:", filtered_boxes)

coordinates = []

for box in filtered_boxes:
    x1, y1, x2, y2 = map(int, box)
    center_x = (x1 + x2) // 2
    center_y = (y1 + y2) // 2

    # Normalize coordinates with respect to the origin
    normalized_x = center_x - origin[0] # Normalize x-axis
    normalized_y = origin[1] - center_y  # Invert y-axis for Cartesian coordinates
    coordinates.append((normalized_x, normalized_y))
    
    text = f"({normalized_x}, {normalized_y})"
    cv2.circle(original_image, (center_x, center_y), 10, (255, 0, 0), -1)
    cv2.putText(original_image, text, (center_x + 10, center_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

print("Coordinates of the centers of the bounding boxes:", coordinates)

# Display the image with the centers drawn
sv.plot_image(original_image, size=(10, 10))

### Export to detections to .JSON format

Cannot be json must be txt or just full text

In [None]:
object_coordinates = [(label, coord[0], coord[1]) for label, coord in zip(labels, coordinates)]
print("Object coordinates:", object_coordinates)

In [None]:
import jsonschema
import json

objectschema = {
    "type": "object",
    "properties": {
        "objects": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "label": {"type": "string"},
                    "x": {"type": "number"},
                    "y": {"type": "number"}
                },
                "required": ["label", "x", "y"]
            }
        }
    },
    "required": ["objects"]
}

# Prepare data in the required format
data = {
    "objects": [
        {"label": label, "x": x, "y": y}
        for label, x, y in object_coordinates
    ]
}

# Validate the data against the schema
jsonschema.validate(instance=data, schema=objectschema)

# Save the data to a JSON file
with open("object_coordinates.txt", "w") as json_file:
    json.dump(data, json_file, indent=4)
    
# Print the JSON data
print(json.dumps(data, indent=4))

print("Object coordinates saved to object_coordinates.json")

In [None]:
client = genai.Client(api_key=os.getenv("API_KEY"))
model_name="gemini-2.5-pro-exp-03-25"

prompt = """You are a network engineer. You are given a .txt file, count the total number of users and estimate the bandwidth needed for each user.
            Additionaly replace the entries of the all the object to individual users, for example a bus counts as 3 users, so for every bus in the file you have to create three entries with the same coordinates in the response, but with the label user.
            The file contains the coordinates of the objects in the image, and you need to provide the coordinates of each user in the response.
        """
system_instructions = "Note that a car, a bus or a truck count as thee users a airplane or chopper count as 2 users and a person as a single user."
file = client.files.upload(file="object_coordinates.txt")
print(f"File uploaded: {file}")

class user(BaseModel):
    label: str
    x: float
    y: float

class Model_Output(BaseModel):
    total_users: int
    bandwidth_per_user: float
    users: list[user]
    
response = client.models.generate_content(
    model=model_name,
    contents=[file,prompt],
    config=genai.types.GenerateContentConfig(
        system_instruction=system_instructions,
        # temperature=0.5,
        #response_mime_type='application/json',
        response_schema=Model_Output.model_json_schema(),
    )
)


print(response.text)