In [4]:
!pip install python-dotenv

Collecting python-dotenv
  Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Using cached python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1


In [25]:
import os
from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
import cv2

load_dotenv()
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
options = os.getenv("OPTIONS").split(",")
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)
features = os.getenv("FEATURES").split(",")
sentence = "Count me how many truck gold and silver there are."

image = cv2.imread("images.jpg")
image_height, image_width, _ = image.shape
image_area = image_width * image_height

In [10]:
sentence_embedding = model.encode([sentence])[0]
options_embeddings = model.encode(options)

similarities = {}
for i, option in enumerate(options):
    similarity = np.dot(sentence_embedding, options_embeddings[i]) / (np.linalg.norm(sentence_embedding) * np.linalg.norm(options_embeddings[i]))
    similarities[option] = similarity

best_option = max(similarities, key=similarities.get)

print(f"Most Probability world is '{best_option}' with similarity {similarities[best_option]:.4f}")

Most Probability world is 'cars' with similarity 0.3768


In [11]:
def get_filter_code(prompt: str) -> str:
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=prompt,
        max_tokens=1000,
        temperature=0,
    )
    
    return response.choices[0].message.content

In [None]:
points_array = np.array([
    [[137, 91], [140, 88], [140, 88], [137, 91]],
    [[30, 30], [130, 30], [130, 80], [30, 80]],
    [[100, 100], [200, 100], [200, 200], [100, 200]]
])

def calculate_features(points, image):
    x_min = min(p[0] for p in points)
    x_max = max(p[0] for p in points)
    y_min = min(p[1] for p in points)
    y_max = max(p[1] for p in points)

    area = (x_max - x_min) * (y_max - y_min)
    relative_area = area / image_area
    relative_height = (y_max - y_min) / image_height
    relative_width = (x_max - x_min) / image_width

    mean_x = np.mean([p[0] for p in points])
    mean_y = np.mean([p[1] for p in points])

    cropped_area = image[y_min:y_max, x_min:x_max]
    mean_color = cv2.mean(cropped_area)[:3]  # BGR

    return {
        "mean_x": mean_x,
        "mean_y": mean_y,
        "mean_color_R": mean_color[2],
        "mean_color_G": mean_color[1],
        "mean_color_B": mean_color[0],
        "area": area,
        "relative_area": relative_area,
        "relative_height": relative_height,
        "relative_width": relative_width
    }

dataset = []
for points in points_array:
    features_dict = calculate_features(points, image)
    dataset.append(features_dict)

df = pd.DataFrame(dataset)

df.head()

Unnamed: 0,mean_x,mean_y,mean_color_R,mean_color_G,mean_color_B,area,relative_area,relative_height,relative_width
0,138.5,89.5,56.222222,76.222222,110.555556,9,0.000179,0.016393,0.010909
1,80.0,55.0,151.015,169.1236,190.649,5000,0.099354,0.273224,0.363636
2,150.0,150.0,78.15759,113.233253,143.865783,10000,0.198708,0.546448,0.363636


In [21]:
query = "Give me the percentage of the area of rows near the center of the image."

messages = [
    {"role": "system", "content": "You are an assistant that helps write Python code."},
    {"role": "user", "content": 
    f"""
        Considering the following {df.columns} columns in pandas dataframes. Image dimensions are {image_width}x{image_height} and the area is {image_area}.
        The user's query is: "{query}".

        Write the Python code based on the query and assign to a variable called filtered data noticed that you have to aggregated in some cases. 
        Do not include comments or import statements or library, only the Python code.
        """}
]

try:
    filter_code = get_filter_code(prompt=messages).replace("```python", "").replace("```", "").strip()
    print(filter_code)
    exec(filter_code)
except Exception as e:
    print(f"An error occurred: {e}")

import pandas as pd

# Assuming df is the DataFrame with the given columns
center_x, center_y = 275 / 2, 183 / 2
threshold_distance = 20

df['distance_to_center'] = ((df['mean_x'] - center_x)**2 + (df['mean_y'] - center_y)**2)**0.5
near_center = df[df['distance_to_center'] <= threshold_distance]

total_area = df['area'].sum()
center_area = near_center['area'].sum()

percentage_area_near_center = (center_area / total_area) * 100
filtered_data = percentage_area_near_center


In [22]:
filtered_data

0.059964021587047764