In [None]:
import os
from pathlib import Path
from ultralytics import YOLO
from PIL import Image
import shutil
import pandas as pd
from source import image_id_converter as img_idc
from source import sort_img_files as sif

In [None]:
os.getcwd()

## Set paths:

In [None]:
root_path = Path('/Users/stephanehess/Documents/CAS_AML/dias_digit_project/project')

In [None]:

# Define paths
image_dir = root_path/"../test_data"  # Replace with your directory containing images
output_dir_with_person = root_path/"../test_with_person"  # Replace with output directory for images with persons
output_dir_without_person = root_path/"../test_without_person"  # Replace with output directory for images without persons


## Create directories for sorting the images:

In [None]:
# Create output directories
#os.chdir(root_path/'..')
os.makedirs(output_dir_with_person, exist_ok=True)
os.makedirs(output_dir_without_person, exist_ok=True)
#os.chdir('root_path')

## Define the pretrained model:

In [None]:
# Load the YOLOv5 model
model = YOLO("yolov8n.pt")  # Use yolov8n (nano) for faster inference


## Loop through images, sort them into the respective output folders according to person detection result and store results in list:

In [None]:
img_ids, with_person = sif.sort_img_files(image_dir, model, output_dir_with_person, output_dir_without_person)

In [None]:
img_ids

In [None]:
with_person

## Load person predictions into a dataframe: 

In [None]:
results_person = pd.DataFrame({'image_id': img_ids, 'with_person': with_person})
results_person.head()


## Add one-hot-coded person predictions:

In [None]:
results_person['with_person_pred']= [1 if x else 0 for x in results_person.with_person]
results_person.head()

## Load person label data:

The file with_without_person.csv contains labels added by (human) visual inspection. The labels thus represent the ground truth regarding to whether or not an image contains a person. The column with_person indicates whether a person or several persons are in the image, the columns recognisable indicates whether such person would be recognisable to a human familiar with the person in question based on their appearance (according to the jugdement of the author).

In [None]:
with_without_person = pd.read_csv(image_dir/'with_without_person_mod_test.csv')
with_without_person


In [None]:
img_ids = list(with_without_person.image_id)

In [None]:
with_without_person['image_id'] = img_idc.reconvert_image_ids(img_ids)

In [None]:
with_without_person.head()

## Rename the labels:

In [None]:
with_without_person.rename(columns={'with_person': 'person_label', 'recognisable': 'recognisable_label'}, inplace=True)
with_without_person.head()


## Merge label data with the predictions:

In [None]:
labels_results = with_without_person.merge(results_person, how='inner', on='image_id')
labels_results.head()

In [None]:
labels_results.shape

## Calculate sensitivity and specificity for person predictions and get lists images with positive person predictions:

In [None]:
positive_bools = labels_results.person_label == 1
negative_bools = labels_results.person_label == 0
positive_pred_bools = labels_results.with_person_pred == 1
negative_pred_bools = labels_results.with_person_pred == 0

positives = labels_results[positive_bools]
negatives = labels_results[negative_bools]
true_positives = labels_results[positive_bools & positive_pred_bools]
true_negatives = labels_results[negative_bools & negative_pred_bools]

false_negatives = labels_results[positive_bools & negative_pred_bools]
false_positives = labels_results[negative_bools & positive_pred_bools]

sensitivity = true_positives.shape[0] / positives.shape[0]
print('sensitivity:')
print(sensitivity)

specificity = true_negatives.shape[0] / negatives.shape[0]
print('specificity:')
print(specificity)


## Inspect false negatives:

In [None]:
false_negatives

## Inspect false positives:

In [None]:
false_positives

## Visually inspect the images in the two folders!

Visually verified all classified images, false negatives are all images with non-recognisable persons (according to my judgement).

## Check how many images have been moved to folder output_dir_with_person:

In [None]:
files_pred_with_person = os.listdir(output_dir_with_person)
if '.DS_Store' in files_pred_with_person:
  files_pred_with_person.remove('.DS_Store')
#files_pred_with_person

In [None]:
len(files_pred_with_person)

In [None]:
files_pred_with_person

## Check how many images have been moved to folder output_dir_without_person:

In [None]:
files_pred_without_person = os.listdir(output_dir_without_person)
if '.DS_Store' in files_pred_without_person:
  files_pred_without_person.remove('.DS_Store')
#files_pred_without_person

In [None]:
len(files_pred_without_person)

In [None]:
files_pred_without_person

## Compare files moved to folders with results in labels_results:

In [None]:
pred_positives = labels_results[positive_pred_bools]
#pred_positives

In [None]:
len(pred_positives)

In [None]:
pred_negatives = labels_results[negative_pred_bools]

In [None]:
len(pred_negatives)

#### Get image ids of the files in the two folders (with or without_persons):

In [None]:
files_pred_with_person_folder = []
for file in files_pred_with_person:
    parts = file.split('.tif')
    img_id = parts[-2][-3:]
    files_pred_with_person_folder.append(img_id)

In [None]:
files_pred_without_person_folder = []
for file in files_pred_without_person:
    parts = file.split('.tif')
    img_id = parts[-2][-3:]
    #print(img_id)
    files_pred_without_person_folder.append(img_id)

#### Compare image ids form the files with the image ids in the labels_results dataframe:

In [None]:
files_pred_positives_doc = set(pred_positives.image_id)

In [None]:
print(len(pred_positives))
print(len(files_pred_positives_doc))

In [None]:
print(len(files_pred_with_person_folder))
print(len(set(files_pred_with_person_folder)))


In [None]:
files_pred_with_person_folder = set(files_pred_with_person_folder)

In [None]:
files_pred_with_person_folder

#### Get the intersection of the image id sets, check if it is as big as each set. If so, the two sets are identical:

In [None]:
length_intersection = len(set.intersection(files_pred_positives_doc, files_pred_with_person_folder))
length_folder = len(files_pred_with_person_folder)
length_doc = len(files_pred_positives_doc)

In [None]:
print(length_intersection == length_folder)
print(length_folder == length_doc)

In [None]:
print(len(set(files_pred_with_person_folder)))
print(len(set(files_pred_positives_doc)))
print(length_intersection)

In [None]:
files_pred_with_person_folder.difference(files_pred_positives_doc)

In [None]:
pred_negatives = labels_results[negative_pred_bools]
files_pred_negatives_doc = set(pred_negatives.image_id)

In [None]:
length_intersection = len(set.intersection(files_pred_negatives_doc, files_pred_without_person_folder))
length_folder = len(files_pred_without_person_folder)
length_doc = len(files_pred_negatives_doc)

In [None]:
print(length_intersection == length_folder)
print(length_folder == length_doc)

In [None]:
length_intersection

In [None]:
length_folder

## Save labels and results:

In [None]:
labels_results

In [None]:
# Add image ids that will remain string type even when saved to csv and reloaded:
labels = list(labels_results.image_id)
new_labels = img_idc.complete_image_ids(labels)
labels_results['image_id_str'] = new_labels
labels_results

In [None]:
os.getcwd()

In [None]:
cols_to_select = ['image_id', 'person_label', 'recognisable_label', 'with_person_pred', 'image_id_str']

In [None]:
labels_results_to_store = labels_results[cols_to_select].copy()
labels_results_to_store

In [None]:
labels_results_to_store.rename({'with_person_pred': 'prediction_with_person'}, axis='columns',
                              inplace=True)

In [None]:
labels_results_to_store

In [None]:
labels_results_to_store.to_csv(image_dir/'results_people_detection_test.csv')