In [None]:
import os
from pathlib import Path
from ultralytics import YOLO
from PIL import Image
import shutil
import pandas as pd
from source import image_id_converter as img_idc
#from source import sort_img_files as sif
from source import llm_input as llm_i
from source import llm_output as llm_o
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import numpy as np

In [None]:
import ollama
import json
import re
import pickle

In [None]:
#root_path = Path('/Users/stephanehess/Documents/CAS_AML/dias_digit_project')
#root_path = Path('/Users/stephanehess/Documents/CAS_AML/dias_digit_project/test_yolo_object_train')

project_path = Path.cwd()
root_path = (project_path / '..' / 'test_data_folders/test_filter_out_people_multi_approach').resolve()
#root_path = (project_path / 'test_data_folders/test_LLM_prompt_experiments').resolve()
#root_path = (project_path / '..' / 'test_data_folders/test_yolo_object_train').resolve()

data_path = root_path / 'data'
tif_data_path = root_path / 'data_1'
#data_path = root_path / 'visual_genome_data_all'
jpg_data_path = root_path / 'data_jpg'
#yolo_path = root_path / 'visual_genome_yolo_all'
output_dir_not_photo = root_path / 'not_photo'
output_dir_with_person = root_path / 'with_person'
output_dir_without_person = root_path / 'without_person'

In [None]:
os.listdir(data_path)

### Load labels and yolo results from csv file:

In [None]:
# Reload saved csv table with yolo results: 
filename = 'people_detect_multi_approach_labels_results_yolo_20260129_101555.csv'
filepath = data_path / filename
yolo_labels_results = pd.read_csv(filepath)
yolo_labels_results.head()

### Load labels and llm results from csv file: 

In [None]:
# Reload saved csv table with llm results:
filename = 'people_detect_multi_approach_labels_results_llm_20260129_101555.csv'
filepath = data_path / filename
llm_labels_results = pd.read_csv(filepath)
llm_labels_results.head()

In [None]:
yolo_labels_results.shape

In [None]:
llm_labels_results.shape

### Reload llm results dictionary: 

In [None]:
# Load llm results dict:
filename = 'results_people_detect_multi_approach_20260129_101555.pkl'
filepath = data_path / filename
with open(filepath, 'rb') as f:
    llm_results_data = pickle.load(f)

In [None]:
llm_results_data

### Load ml metrics data: 

In [None]:
# Reload yolo ml metrics: 
filename = 'people_detect_multi_approach_ml_metrics_yolo_20260129_101555.csv'
filepath = data_path / filename
ml_metrics_yolo = pd.read_csv(filepath)
ml_metrics_yolo.head()

In [None]:
# Reload llm ml metrics:
filename = 'people_detect_multi_approach_ml_metrics_llm_20260129_101555.csv'
filepath = data_path / filename
ml_metrics_yolo = pd.read_csv(filepath)
ml_metrics_yolo.head()

In [None]:
# Reload times metrics:
filename = 'times_people_detect_multi_approach__yolo_20260129_101555.pkl'
filepath = data_path / filename
with open(filepath, 'rb') as f:
   times_data_yolo = pickle.load(f)

In [None]:
times_data_yolo

In [None]:
# Reload times metrics:
filename = 'times_people_detect_multi_approach__llm_20260129_101555.pkl'
filepath = data_path / filename
with open(filepath, 'rb') as f:
   times_data_llm = pickle.load(f)

In [None]:
times_data_llm

In [None]:
pd.DataFrame(times_data)

In [None]:
label_data_m = labels_results

### Get number of NaN's:

In [None]:
num_nans = yolo_labels_results.isna().sum().sum()
num_nans


In [None]:
num_nulls = llm_labels_results.isnull().sum().sum()
num_nulls


### Replace nan's with 0 for confusion matrix (as NaN can be interpreted as 'not recognised as in category x'):

In [None]:
llm_labels_results[llm_labels_results.isna().any(axis=1)]

In [None]:
llm_labels_results_filled = llm_labels_results.fillna(0)
llm_labels_results_filled


In [None]:
yolo_labels_results_clean = yolo_labels_results.dropna()
yolo_labels_results_clean

In [None]:
llm_labels_results_clean = llm_labels_results.dropna()
llm_labels_results_clean


### Confusion matrix 'with_person':

#### Recalculate ml metrics: 

In [None]:
# Recalculate ml metrics: 
var_name = 'with_person'
pred_var_name = 'with_person_pred'

subsets_and_metrics = llm_o.get_classification_subsets_metrics(yolo_labels_results, var_name, pred_var_name)


In [None]:
(positives, negatives, true_positives, true_negatives, 
                           false_negatives, false_positives, sensitivity, specificity) = subsets_and_metrics
print('sensitivity:')
print(sensitivity)
print('specificity:')
print(specificity)


#### Plot confusion matrix:

In [None]:

cases = true_positives, false_positives, true_negatives, false_negatives, positives, negatives

# llm_o.plot_conf_matrix(labels_results, 'with_person', 'with_person_yolo_pred', cases)
fig = llm_o.save_conf_matrix(yolo_labels_results, var_name, pred_var_name, cases)

#### Save confusion matrix as pdf:

In [None]:
# The fig object was created when plotting the confusion matrix
# so now we can use it to save the plot as pdf:
file_name = 'filter_out_people_conf_matrix_yolo_' + var_name + '.pdf'
conf_matrix_path = data_path / file_name
fig.savefig(conf_matrix_path)
plt.close(fig)

In [None]:
false_negatives

In [None]:
false_positives

### Confusion matrix 'person_recognisable':

In [None]:
# Recalculate ml metrics: 
var_name = 'person_recognisable'
pred_var_name = 'with_person_pred'

subsets_and_metrics = llm_o.get_classification_subsets_metrics(yolo_labels_results_clean, var_name, pred_var_name)


In [None]:
(positives, negatives, true_positives, true_negatives, 
                           false_negatives, false_positives, sensitivity, specificity) = subsets_and_metrics
print('sensitivity:')
print(sensitivity)
print('specificity:')
print(specificity)


#### Plot confusion matrix:

In [None]:

cases = true_positives, false_positives, true_negatives, false_negatives, positives, negatives

# llm_o.plot_conf_matrix(labels_results, 'with_person', 'with_person_yolo_pred', cases)
fig = llm_o.save_conf_matrix(yolo_labels_results_clean, var_name, pred_var_name, cases)


#### Save confusion matrix as pdf:

In [None]:
# The fig object was created when plotting the confusion matrix
# so now we can use it to save the plot as pdf:
file_name = 'filter_out_people_recognisable_conf_matrix_yolo_' + var_name + '.pdf'
conf_matrix_path = data_path / file_name
fig.savefig(conf_matrix_path)
plt.close(fig)

In [None]:
false_negatives

In [None]:
false_positives

### Confusion matrix 'with_person' minicpm:

In [None]:
# Recalculate ml metrics: 
var_name = 'with_person'
pred_var_name = 'with_person_pred'

subsets_and_metrics = llm_o.get_classification_subsets_metrics(llm_labels_results_clean, var_name, pred_var_name)


In [None]:
(positives, negatives, true_positives, true_negatives, 
                           false_negatives, false_positives, sensitivity, specificity) = subsets_and_metrics
print('sensitivity:')
print(sensitivity)
print('specificity:')
print(specificity)


#### Plot confusion matrix:

In [None]:

cases = true_positives, false_positives, true_negatives, false_negatives, positives, negatives

# llm_o.plot_conf_matrix(labels_results, 'with_person', 'with_person_yolo_pred', cases)
fig = llm_o.save_conf_matrix(llm_labels_results_clean, var_name, pred_var_name, cases)

#### Save confusion matrix as pdf:

In [None]:
# The fig object was created when plotting the confusion matrix
# so now we can use it to save the plot as pdf:
file_name = 'filter_out_people_conf_matrix_minicpm_' + var_name + '.pdf'
conf_matrix_path = data_path / file_name
fig.savefig(conf_matrix_path)
plt.close(fig)

In [None]:
false_negatives

In [None]:
false_positives

### Confusion matrix 'person_recognisable' minicpm:

In [None]:
# Recalculate ml metrics: 
var_name = 'person_recognisable'
pred_var_name = 'with_person_pred'

subsets_and_metrics = llm_o.get_classification_subsets_metrics(llm_labels_results_clean, var_name, pred_var_name)


In [None]:
(positives, negatives, true_positives, true_negatives, 
                           false_negatives, false_positives, sensitivity, specificity) = subsets_and_metrics
print('sensitivity:')
print(sensitivity)
print('specificity:')
print(specificity)


#### Plot confusion matrix:

In [None]:

cases = true_positives, false_positives, true_negatives, false_negatives, positives, negatives

# llm_o.plot_conf_matrix(labels_results, 'with_person', 'with_person_yolo_pred', cases)
fig = llm_o.save_conf_matrix(llm_labels_results_clean, var_name, pred_var_name, cases)

#### Save confusion matrix as pdf:

In [None]:
# The fig object was created when plotting the confusion matrix
# so now we can use it to save the plot as pdf:
file_name = 'filter_out_people_recognisable_conf_matrix_minicpm_' + var_name + '.pdf'
conf_matrix_path = data_path / file_name
fig.savefig(conf_matrix_path)
plt.close(fig)

In [None]:
false_negatives

In [None]:
false_positives