# Object detection

Object detection + segmentation using ultralytics [YOLO](https://github.com/ultralytics/ultralytics)

In [1]:
import os
import cv2
import pickle
import pandas as pd

from tqdm import tqdm
from ultralytics import YOLO

## Performing inference and storing predictions

Predicting using weights of YOLO 11x for segmentation trained on [COCO's](https://cocodataset.org/#home) dataset.

In [2]:
model = YOLO('models/segmentation/yolo11x-seg.pt')
model.to('cuda')

predict = False

if predict:
    results = {}
    type = 'nat'
    image_files = [os.path.join(f'images/images_{type}', f) for f in os.listdir(f'images/images_{type}') if os.path.isfile(os.path.join(f'images/images_{type}', f))]
    for image_file in tqdm(image_files, total=len(image_files)):
        results[image_file[image_file.rfind('\\')+1:]] = model.predict(image_file, verbose=False)[0]

    results_art = {}
    type = 'art'
    image_files = [os.path.join(f'images/images_{type}', f) for f in os.listdir(f'images/images_{type}') if os.path.isfile(os.path.join(f'images/images_{type}', f))]
    for image_file in tqdm(image_files, total=len(image_files)):
        results_art[image_file[image_file.rfind('\\')+1:]] = model.predict(image_file, verbose=False)[0]

    results.update(results_art)

    with open('predictions/saved_predictions.pkl', 'wb') as f:
        pickle.dump(results, f)

## Visualizing Results

In [22]:
with open('predictions/saved_predictions.pkl', 'rb') as f:
    results = pickle.load(f)

results[list(results.keys())[16]].show()

# Saving Object detection stats

## Stats per Image

In [5]:
image_files = []
num_predictions = []
detected_objects = empty_lists = [[] for _ in range(80)]

for image_file, predictions in tqdm(results.items()):
    image_files.append(image_file)
    num_predictions.append(len(predictions.boxes.cls))
    for i in range(80):
        detected_objects[i].append(int(sum(predictions.boxes.cls == i)))

dataframe_dict = {}
dataframe_dict['image_file'] = image_files
dataframe_dict['num_predictions'] = num_predictions
class_names = results[list(results.keys())[0]].names
for i in range(80):
    dataframe_dict[class_names[i]] = detected_objects[i]

df = pd.DataFrame(dataframe_dict)
df.to_csv('data_out/object_detection/objects_per_image.csv', index=False)

100%|██████████| 400/400 [00:02<00:00, 136.45it/s]


In [49]:
#AADB - farm1_268_19534013654_50c879f919_b.jpg


results['VIU - image_r_772.jpg'].masks.data[0].cpu().numpy()


array([[          0,           0,           0, ...,           0,           0,           0],
       [          0,           0,           0, ...,           0,           0,           0],
       [          0,           0,           0, ...,           0,           0,           0],
       ...,
       [          0,           0,           0, ...,           0,           0,           0],
       [          0,           0,           0, ...,           0,           0,           0],
       [          0,           0,           0, ...,           0,           0,           0]], dtype=float32)

## Stats per Object

In [58]:
images = []
object_ids = []
object_classes = []
confidence_scores = []
object_areas = []

import matplotlib.pyplot as plt

for image_file, predictions in tqdm(results.items()):
    # Images with no predictions
    if predictions.masks is None:
        continue

    for i in range(len(predictions.boxes.cls)):
        images.append(image_file)
        object_ids.append(i)
        object_classes.append(predictions.names[int(predictions.boxes.cls[i])])
        confidence_scores.append(float(predictions.boxes.conf[i]))

        # Getting mask area

        mask = predictions.masks[i].data.cpu().numpy()[0]
        mask = cv2.resize(mask, (predictions.orig_shape[1], predictions.orig_shape[0]))
        object_areas.append(cv2.countNonZero(mask))


dataframe_dict = {
    'image_file': images,
    'object_id': object_ids,
    'object_class': object_classes,
    'confidence_score': confidence_scores,
    'object_area': object_areas
}

df = pd.DataFrame(dataframe_dict)
df.to_csv('data_out/object_detection/detected_object_data.csv', index=False)
df

100%|██████████| 400/400 [00:00<00:00, 450.40it/s]


Unnamed: 0,image_file,object_id,object_class,confidence_score,object_area
0,AADB - farm1_260_20156759346_a5ab2f2a78_b.jpg,0,chair,0.874608,2130
1,AADB - farm1_260_20156759346_a5ab2f2a78_b.jpg,1,chair,0.843225,2004
2,AADB - farm1_260_20156759346_a5ab2f2a78_b.jpg,2,dining table,0.626132,4796
3,AADB - farm1_260_20156759346_a5ab2f2a78_b.jpg,3,chair,0.585403,1010
4,AADB - farm1_260_20156759346_a5ab2f2a78_b.jpg,4,chair,0.501245,478
...,...,...,...,...,...
1031,nicolae-darascu_still-life-with-pumpkin.jpg,4,orange,0.367205,4158
1032,pablo-picasso_geometrical-composition-the-guit...,0,tie,0.346885,12381
1033,patrick-caulfield_the-letter-1967.jpg,0,kite,0.461603,24147
1034,yves-gaucher_danse-carr-e-once-upon-a-square-1...,0,clock,0.551528,125706
