In [None]:
import os
from pathlib import Path
from ultralytics import YOLO
from PIL import Image
import shutil
import pandas as pd
from source import image_id_converter as img_idc
from source import sort_img_files as sif
import matplotlib.pyplot as plt
from source import llm_input as llm_i
from source import llm_output as llm_o
import os
from PIL import Image

In [None]:
import ollama
import json
import re
import pickle

In [None]:
os.getcwd()

# Using LLM (mini-CPM) for image analysis

## Define Functions:

In [None]:
def create_analysis_prompt():
    """Create the structured prompt for image analysis."""
    return """
    Analyze this image and return ONLY a Python dictionary in exactly this format:
    
    {
        'image_type': [],  # List all that apply: photography, drawing, painting, statistics figure, map, scheme, other
        'person': X,              # 1 if present, 0 if not
        'mountain': X,            # 1 if present, 0 if not
        'river': X,               # 1 if present, 0 if not
        'lake': X,                # 1 if present, 0 if not
        'building': X,            # 1 if present, 0 if not
        'church': X,              # 1 if present, 0 if not
        'city': X,                # 1 if present, 0 if not
        'village': X,             # 1 if present, 0 if not
        'glacier': X,             # 1 if present, 0 if not
        'other_objects': [],      # List of other noteworthy/dominant objects
        'additional_comments': '' # Any additional observations or empty string if none
    }
    
    Replace X with 1 (present) or 0 (not present).
    Return ONLY the dictionary, no other text.
    Your answer MUST have the exact structue of the dictionary described above (all keys MUST be present). 
    If you cannot answer the question in the way implied by this structure, enter 'None' as value and offer 
    your answer and explanations under 'additional_comments'.
    """

In [None]:
def create_analysis_prompt():
    """Create the structured prompt for image analysis."""
    return """
    Analyze this image and return ONLY a Python dictionary in exactly this format:
    
    {
        'image_type': [],  # List all that apply: photography, drawing, painting, statistics figure, map, scheme, other
        'Does this appear to be in an high Alpine environment?' # 1 if yes, 0 if no
        'person': X,              # 1 if present, 0 if not
        'glacier': X,             # 1 if present, 0 if not
        'church': X,              # 1 if present, 0 if not
        'water body': X.          # 1 if present, 0 if not
        'other_objects': [],      # List of other noteworthy/dominant objects
        'additional_comments': '' # Any additional observations or empty string if none
    }
    
    Replace X with 1 (present) or 0 (not present).
    Return ONLY the dictionary, no other text.
    """

In [None]:
def create_analysis_prompt():
    """Create the structured prompt for image analysis."""
    return """
    Analyze this image and return ONLY a Python dictionary in exactly this format:
    
    {
        'image_type': [],  # List all that apply: photography, drawing, painting, statistics figure, map, scheme, other
        'high alpine environment' # 1 if this appears to be in an high Alpine environment, 0 if not
        'person': X,               # 1 if present, 0 if not
        'glacier': X,              # 1 if present, 0 if not
        'church': X,               # 1 if present, 0 if not
        'water body': X.           # 1 if present, 0 if not
        'other_objects': [],       # List of other noteworthy/dominant objects
        'additional_comments': ''  # Any additional observations or empty string if none
    }
    
    Replace X with 1 (present) or 0 (not present).
    Return ONLY the dictionary, no other text.
    """

In [None]:
def add_pred_values(idx, labels_results, columns, values_to_add):
    selection_bools = labels_results.image_id == idx
    
    labels_results.loc[selection_bools, columns] = values_to_add

### Prepare empty dictionary for time analyses and get time stamp for overall workflow duration:

In [None]:
time_analyses = {}
time_analyses_for_df = {}
time_analyses_for_df['analysis_name'] = []
time_analyses_for_df['time_stamp_start'] = []
time_analyses_for_df['duration_str'] = []
time_analyses_for_df['duration_seconds'] = []
time_analyses_for_df['duration_seconds_str'] = []
time_analyses_for_df['duration_minutes'] = []
time_analyses_for_df['duration_minutes_str'] = []

timestamp_start_workflow = pd.Timestamp.now()
timestamp_start_workflow

### Define LLM model to be used:

In [None]:
model_function = llm_i.call_minicpm_model

## Set paths:

In [None]:
#root_path = Path('/Users/stephanehess/Documents/CAS_AML/dias_digit_project')
#root_path = Path('/Users/stephanehess/Documents/CAS_AML/dias_digit_project/test_yolo_object_train')

project_path = Path.cwd()
#root_path = (project_path / '..').resolve()
#root_path = (project_path / '..' / 'test_yolo_object_train').resolve()
root_path = project_path / 'test_llm_img_analysis'
data_path = root_path / 'data'
tif_data_path = root_path / 'data_1'
#data_path = root_path / 'visual_genome_data_all'
jpg_data_path = root_path / 'data_jpg'
#yolo_path = root_path / 'visual_genome_yolo_all'
output_dir_not_photo = root_path / 'not_photo'
output_dir_with_person = root_path / 'with_person'
output_dir_without_person = root_path / 'without_person'



### Copy and convert image files from tif_data_path to jpg_data_path:

In [None]:

source_folder = tif_data_path
destination_folder = jpg_data_path

llm_i.convert_tif_to_jpg(source_folder, destination_folder, quality=100)


## Create directories for sorting the images:

In [None]:
# Create output directories
#os.chdir(root_path/'..')
os.makedirs(output_dir_not_photo, exist_ok=True)
os.makedirs(output_dir_with_person, exist_ok=True)
os.makedirs(output_dir_without_person, exist_ok=True)
#os.chdir('root_path')

### Set analysis name: 

In [None]:
analysis_name = 'image_analysis_minicpm'

## Loop through images and analyze with miniCPM (LLM model):

In [None]:
# Make sure no .DS_Store file is included in jpg_data_path: 
import os
ds_file_path = jpg_data_path / '.DS_Store'
# Remove a specific .DS_Store file
if os.path.exists(ds_file_path):
    os.remove(ds_file_path)
    print("Removed .DS_Store")
else:
    print(".DS_Store not found")

In [None]:
# Get list of image files to analyse: 
image_files = os.listdir(jpg_data_path)

# Specify names of categorical variables that the create_analysis_prompt refers to 
# (for image_type one of a range of categories is expected,
# the other ones are one-hot coded):
# keys_list_expected = ['image_type', 'person', 'mountain', 'river', 
#                       'lake', 'building', 'church', 'city', 'village', 
#                       'glacier', 'other_objects', 'additional_comments']

# Make empty dictionary to store results:
image_descr = {}

# Loop through images to get answers: 
for image_file in image_files:
    image_path = jpg_data_path / image_file
    path_str = str(image_path)
    print('\n')
    print(path_str)
    parts = path_str.split('.jpg')
    img_id = parts[-2][-3:]

    # Analyse image, get values for each of the categorical variables specified above:
    #image_description = analyze_image_structured(image_path)
    image_description = llm_o.analyze_image_structured(image_path, create_analysis_prompt, model_function)
    
    dict_type_bool = type(image_description) == dict
        
    print(image_description)
    image_descr[img_id] = image_description

In [None]:
len(image_descr)

In [None]:
type(image_descr)

### Load label data (ground truth) to compare to LLM responses:

The file with_without_person.csv contains labels added by (human) visual inspection that represent the ground truth. 
 * Column with_person: whether or not any person is in the image.
 * Column recognisable: whether any person that would be recognisable to a human familiar with said person is in the image.
 * Column photo: whether or not the image is a photograph (as opposed to some other kind of representation such as map, drawing, painting, scheme, figure)
 * Column church: whether or not any church is in the image.
 * Column high_alpine_environment: whether or not the scene shown in the image is situated in a high alpine environment (according to non-expert human judgement)

In [None]:
label_data = pd.read_csv(data_path/'labels_mod.csv')
label_data.head()

In [None]:
img_ids = list(label_data.image_id)

In [None]:
# Reconvert image ids to integers (e.g. '234') as strings from the form they were saved in (e.g. 'id234' to ensure 
# string data type to deal with duck typing): 
label_data['image_id'] = img_idc.reconvert_image_ids(img_ids)

In [None]:
label_data.head()

### Rename the labels:

### The following cell is only required for the test run on the test data: 

In [None]:
# Select only rows referring to images in the smaller data set (test run):

# Make sure no .DS_Store file is included in jpg_data_path: 
import os
ds_file_path = jpg_data_path / '.DS_Store'

# Remove a specific .DS_Store file
if os.path.exists(ds_file_path):
    os.remove(ds_file_path)
    print("Removed .DS_Store")
else:
    print(".DS_Store not found")

# Find all .ipynb_checkpoints directories
for checkpoint_dir in jpg_data_path.rglob('.ipynb_checkpoints'):
    if checkpoint_dir.is_dir():
        print(f"Removing: {checkpoint_dir}")
        shutil.rmtree(checkpoint_dir)



# Get list of image files present:
image_files = os.listdir(jpg_data_path)

#image_files.remove(".ipynb_checkpoints")



# Extract image ids from image file names:
img_ids = [image_file.split('Oberland')[1].split('.')[0] for image_file in image_files]
img_ids.sort()
print(img_ids)

# Select relevant rows from label_data data frame by id list: 
select_bools = [img_id in img_ids for img_id in label_data.image_id]

label_data = label_data[select_bools].copy()
label_data

### Loop through Responses from the LLM and incorporate them into a Data Frame:

In [None]:
# Prepare empty lists to store results
img_ids = []
img_type = []
is_photo = []
with_person = []
with_church = []

# Get list of image ids: 
img_ids = label_data.image_id

# Make empty list to store responses that cannot be parsed
# due to faulty structure for closer inspection: 
img_ids_closer_inspection = []

iter_count = 0

# List of keys expected in each dictionary provided as an answer by
# the LLM:
keys_list_expected = ['image_type', 'high alpine environment', 'person', 
                      'glacier', 'church', 'water body', 'other_objects', 
                      'additional_comments']

# Loop through image ids:
for img_id in img_ids:

    #Â Get response from LLM for image id in question:
    img_pred = image_descr[img_id]

    # Get keys from response dictionary:
    keys_list = list(img_pred.keys())

    # Check if structure and keys of response match expectation:
    dict_struct_condition = (type(img_pred) == dict)
    keys_condition = (keys_list_expected == keys_list)

    # Check if response key 
    raw_key_condition = keys_list == ['raw_response']
    
    # If the llm response corresponds to the expected
    # structure, get response values as planned:
    if dict_struct_condition and keys_condition:
        
        image_type_values = img_pred['image_type']

        is_photo_bool = 'photography' in image_type_values
        if is_photo_bool:
            is_photo_value = 1
        else:
            is_photo_value = 0
        
        person_value = img_pred['person']
        
        church_value = img_pred['church']

        img_type.append(image_type_values)
        is_photo.append(is_photo_value)
        with_person.append(person_value)
        with_church.append(church_value)
        
    # If llm response does not correspond to the expected 
    # structure but does have the 'raw_response' key
    # try to identify a dictionary inside the response text
    # and try to parse this dictionary as planned:
    elif dict_struct_condition and raw_key_condition:
        print('\n')
        print('raw_repsonse_dict:')
        print(img_id)
        print(dict_struct_condition)
        print(raw_key_condition)

        response_text = img_pred['raw_response']

        start_indices = [i for i, char in enumerate(response_text) if char == '{']
        start_idx = start_indices[0]
        
        end_indices = [i for i, char in enumerate(response_text) if char == '}']
        end_idx = end_indices[0]

        dict_in_text = response_text[start_idx:end_idx+1]

        success_bool, img_pred = parse_response_to_dict(dict_in_text)
        print('success_bool:')
        print(success_bool)

        # If a dictionary is found and parsed successfully
        # get response values as planned:
        if success_bool:
            print(type(img_pred))
            print(img_pred.keys())
            
            image_type_values = img_pred['image_type']
    
            is_photo_bool = 'photography' in image_type_values
            if is_photo_bool:
                is_photo_value = 1
            else:
                is_photo_value = 0
            
            person_value = img_pred['person']
            
            church_value = img_pred['church']

            img_type.append(image_type_values)
            is_photo.append(is_photo_value)
            with_person.append(person_value)
            with_church.append(church_value)
            
        else:
            # If dictionary is not found or not successfully
            # parsed, add the image in question to the list
            # of images for closer (visual) inspection:
            print('parse unsuccessful')
            print(img_id)
            img_ids_closer_inspection.append(img_id)
            img_type.append(None)
            is_photo.append(None)
            with_person.append(None)
            with_church.append(None)

    # If the llm response does not have the expected struture
    # and no 'raw_response' key is found, add the image in 
    # question to the list of images for closer (visual)
    # inspection:
    else:
        print('\n')
        print('no structure at all:')
        print(img_id)
        img_ids_closer_inspection.append(img_id)
        img_type.append(None)
        is_photo.append(None)
        with_person.append(None)
        with_church.append(None)
        
        
    
    iter_count += 1


In [None]:
# Check if all response variable lists have the same length:
print(len(img_ids))
print(len(img_type))
print(len(is_photo))
print(len(with_person))
print(len(with_church))
print(len(img_ids))

In [None]:
# Check image list for closer inspection:
img_ids_closer_inspection

In [None]:
# Put response variables into data frame: 
predictions = pd.DataFrame({'image_id': img_ids, 
                           'is_photo_pred': is_photo,
                           'with_person_pred': with_person,
                           'with_church_pred': with_church})

In [None]:
predictions.head()

In [None]:
predictions['is_photo_pred'] = predictions['is_photo_pred'].astype('Int8')
predictions['with_person_pred'] = predictions['with_person_pred'].astype('Int8')
predictions['with_church_pred'] = predictions['with_church_pred'].astype('Int8')

### Merge label data with the predictions:

In [None]:
labels_results = label_data.merge(predictions, how='inner', on='image_id')
labels_results.head()

In [None]:
labels_results.shape

In [None]:
type(image_descr)

In [None]:
len(image_descr)

In [None]:
len(image_descr.keys())

In [None]:
keys_list = list(image_descr.keys())
keys_list[0:3]

In [None]:
iter_count = 0
for key, item in image_descr.items():
    print('\n')
    print(key)
    print(item)
    print(type(item))
    print(item.keys())
    iter_count += 1
    if iter_count > 4:
        break

In [None]:
img_ids_closer_inspection

In [None]:
labels_results

### Convert data type of added 

In [None]:
labels_results.astype({'is_photo_pred': 'Int8', 'with_person_pred': 'Int8',
                       'with_church_pred': 'Int8'})

In [None]:
print(output_dir_not_photo)
print(output_dir_with_person)

### Calculate sensitivity and specificity for person predictions and get lists images with positive person predictions:

In [None]:
positive_bools = labels_results.with_person == 1
negative_bools = labels_results.with_person == 0
positive_pred_bools = labels_results.with_person_pred == 1
negative_pred_bools = labels_results.with_person_pred == 0

positives = labels_results[positive_bools]
negatives = labels_results[negative_bools]
true_positives = labels_results[positive_bools & positive_pred_bools]
true_negatives = labels_results[negative_bools & negative_pred_bools]

false_negatives = labels_results[positive_bools & negative_pred_bools]
false_positives = labels_results[negative_bools & positive_pred_bools]

sensitivity = true_positives.shape[0] / positives.shape[0]
print('sensitivity:')
print(sensitivity)

specificity = true_negatives.shape[0] / negatives.shape[0]
print('specificity:')
print(specificity)


### Inspect false negatives:

In [None]:
false_negatives

### Inspect false positives:

In [None]:
false_positives

## Repeat the same procedure but with nicer code (modularized):

In [None]:
def create_analysis_prompt():
    """Create the structured prompt for image analysis."""
    return """
    Analyze this image and return ONLY a Python dictionary in exactly this format:
    
    {
        'image_type': [],  # List all that apply: photography, drawing, painting, statistics figure, map, scheme, other
        'high alpine environment' # 1 if this appears to be in an high Alpine environment, 0 if not
        'person': X,               # 1 if present, 0 if not
        'glacier': X,              # 1 if present, 0 if not
        'church': X,               # 1 if present, 0 if not
        'water body': X.           # 1 if present, 0 if not
        'other_objects': [],       # List of other noteworthy/dominant objects
        'additional_comments': ''  # Any additional observations or empty string if none
    }
    
    Replace X with 1 (present) or 0 (not present).
    Return ONLY the dictionary, no other text.
    """

In [None]:
def create_prompt_img_type_multi_object():
    """Create the structured prompt for image analysis."""
    return """
    Analyze this image and return ONLY a Python dictionary in exactly this format:
    
    {
        'image_is_photograph': X,     # True if the image is a photograph, False otherwise (if the image is a drawing, painting, statistics figure, map, scheme, other)
        'high_alpine_environment': X, # True if this appears to be in an high Alpine environment, False if not
        'person': X,                  # True if present, False if not
        'glacier': X,                 # True if present, False if not
        'church': X,                  # True if present, False if not
        'water_body': X.              # True if present, False if not
        'other_objects': [],          # List of other noteworthy/dominant objects
        'additional_comments': ''     # Any additional observations or empty string if none
    }
    
    Replace X with True (present) or False (not present).
    Return ONLY the dictionary, no other text.
    """

In [None]:
def analyse_giub_img_dir_llm(jpg_data_path, create_analysis_prompt, model_function):
    # Get time stamp:
    timestamp_start_is_photo_analysis = pd.Timestamp.now()
    
    # Get list of image files to analyse: 
    image_files = os.listdir(jpg_data_path)
    img_ids = [image_file.split('Oberland')[1].split('.')[0] for image_file in image_files]
    
    # Make empty dictionary to store results:
    image_descr = {}
    
    # Loop through images to get answers: 
    for image_file in image_files:
        image_path = jpg_data_path / image_file
        path_str = str(image_path)
        #print('\n')
        #print(path_str)
        parts = path_str.split('.jpg')
        img_id = parts[-2][-3:]
    
        # Analyse image, get values for each of the categorical variables specified above:
        #image_description = analyze_image_structured(image_path)
        #image_description = llm_o.analyze_image_structured(image_path, create_analysis_prompt)
        image_description = llm_o.analyze_image_structured(image_path, create_analysis_prompt, model_function)
        
        dict_type_bool = type(image_description) == dict
            
        #print(image_description)
        image_descr[img_id] = image_description
    
    timestamp_end_is_photo_analysis = pd.Timestamp.now()

    return timestamp_start_is_photo_analysis, timestamp_end_is_photo_analysis, image_descr
    

In [None]:
def store_duration(time_analysis_dict, time_analysis_for_df_dict, analysis_name, duration,
                  timestamp_start_is_photo_analysis,
                  timestamp_end_is_photo_analysis):
    time_analysis_dict[analysis_name] = {}
    time_analysis_dict[analysis_name]['duration_str'] = f"Analysis took: {duration}"
    time_analysis_dict[analysis_name]['duration_seconds'] = total_seconds
    time_analysis_dict[analysis_name]['duration_seconds_str'] = f"Analysis took: {total_seconds:.2f} seconds"
    time_analysis_dict[analysis_name]['duration_minutes'] = total_seconds/60
    time_analysis_dict[analysis_name]['duration_minutes_str'] = f"Analysis took: {total_seconds/60:.2f} minutes"
    time_analysis_dict[analysis_name]['time_stamp_start'] = timestamp_start_is_photo_analysis
    time_analysis_dict[analysis_name]['time_stamp_end'] = timestamp_end_is_photo_analysis

    time_analysis_for_df_dict['analysis_name'].append(analysis_name)
    time_analysis_for_df_dict['time_stamp_start'].append(timestamp_start_is_photo_analysis)
    time_analysis_for_df_dict['duration_str'].append(f"Analysis took: {duration}")
    time_analysis_for_df_dict['duration_seconds'].append(total_seconds)
    time_analysis_for_df_dict['duration_seconds_str'].append(f"Analysis took: {total_seconds:.2f} seconds")
    time_analysis_for_df_dict['duration_minutes'].append(total_seconds/60)
    time_analysis_for_df_dict['duration_minutes_str'].append(f"Analysis took: {total_seconds/60:.2f} minutes")

    return time_analysis_dict, time_analysis_for_df_dict

In [None]:
label_data.columns

### Set parameters:

In [None]:
# Set parameters: 
prompt_func = create_prompt_img_type_multi_object
prompt_template = prompt_func.__name__
prompt_id = prompt_template + '_v1'
prompt_text = prompt_func()
# analysis_name = 'is_photo_struct_minicpm'


keys_list_expected = ['image_is_photograph', 'high_alpine_environment', 'person', 'glacier',
                      'church', 'water_body', 'other_objects', 'additional_comments']

response_variable = 'image_is_photograph'

# label_name = 'is_photo'
# prediction_name = 'is_photo_pred'

### Prepare data objects: 

In [None]:
# Prepare data objects: 
response_dictionaries = {}
response_dictionaries[prompt_id] = {}

images_closer_inspection = {}

results_tabular = {}

ml_metrics = pd.DataFrame({})

ml_metrics_analysis_name = []
ml_metrics_prompt_id = []
ml_metrics_label_name = []
ml_metrics_time_stamp = []
ml_metrics_positives = []
ml_metrics_negatives = []
ml_metrics_true_positives = []
ml_metrics_false_positives = []
ml_metrics_true_negatives = []
ml_metrics_false_negatives = []
ml_metrics_sensitivity = []
ml_metrics_specificity = []

### Carry out LLM analysis of the images:

In [None]:
# Carry out the LLM analysis:
timestamp_start_is_photo_analysis, timestamp_end_is_photo_analysis, image_descr = analyse_giub_img_dir_llm(jpg_data_path, prompt_func, model_function)


In [None]:
# Calculate duration of analysis: 
duration = timestamp_end_is_photo_analysis - timestamp_start_is_photo_analysis
total_seconds = duration.total_seconds()
print(total_seconds)

### Extract and organize information from the dictionary containing the LLM responses:

In [None]:

# Calculate duration of analysis: 
duration = timestamp_end_is_photo_analysis - timestamp_start_is_photo_analysis
total_seconds = duration.total_seconds()
print(total_seconds)

# Store information about duration of LLM task: 
time_analyses, time_analyses_for_df = store_duration(time_analyses, time_analyses_for_df, analysis_name, 
                duration,timestamp_start_is_photo_analysis,
                timestamp_end_is_photo_analysis)

# Get timestamp_id as string from the time stamp:
timestamp_id = timestamp_start_is_photo_analysis.strftime('%Y%m%d_%H%M%S')

# Store dictionary with LLM responses as raw data:
response_dictionaries[prompt_id][timestamp_id] = image_descr

# convert img_ids pandas series into list:
img_ids_l = list(img_ids)

# Prepare response variable names and label names to loop through:
response_variables = ['image_is_photograph', 'person', 'church']
label_names = ['is_photo', 'with_person', 'with_church']
#analysis_names = ['is_photo_struct_minicpm', 'with_person_struct_minicpm', 'with_church_struct_minicpm']

# Prepare dictionary for long term storing of results: 
results_tabular[timestamp_id] = {}
results_tabular[timestamp_id]['prompt_id'] = prompt_id
results_tabular[timestamp_id]['prompt_template'] = prompt_template
results_tabular[timestamp_id]['prompt_text'] = prompt_text
results_tabular[timestamp_id]['predictions'] = {}

# Get copy of label data to merge with prediction for short term presentation of results:
labels_results_i = label_data.copy()
print('labels_results initial:')
print(labels_results_i.shape)
print(labels_results_i.columns)

# Extract predictions for different response variables:
for response_variable, label_name in zip(response_variables, label_names):
    # set prediction name: 
    prediction_name = label_name + '_pred'
    analysis_name = label_name + '_struct_minicpm'
    print('\n')
    print('response_variable name and prediction_name:')
    print(response_variable)
    print(prediction_name)
    img_ids, response_values, img_ids_closer_inspection = \
    llm_o.extract_vals_from_response_dict(img_ids_l, image_descr, keys_list_expected, response_variable)

    timestamp_ids = [timestamp_id] * len(img_ids_l)
    
    predictions = pd.DataFrame({'image_id': img_ids_l, 
                                   prediction_name: response_values})
    predictions[prediction_name] = predictions[prediction_name].astype('Int8')
    
    print('predictions:')
    print(predictions.shape)
    print(predictions.columns)

    results_tabular[timestamp_id]['predictions'][response_variable] = predictions
    
    # Merge label data with the predictions:
    labels_results_i = labels_results_i.merge(predictions, how='inner', on='image_id')
    print('\n')
    print('merged labels_results:')
    print(labels_results_i.shape)
    print(labels_results_i.columns)

    # Save image list for closer inspection:
    timestamp_ids = [timestamp_id] * len(img_ids_closer_inspection)
    imgs_closer_inspection = pd.DataFrame({'image_id': img_ids_closer_inspection,
    'time_stamp': timestamp_ids})
    images_closer_inspection[analysis_name] = imgs_closer_inspection
    
    # Calculate sensitivity and specificity for photography predictions and get lists images with positive photography predictions:
    subsets_and_metrics = llm_o.get_classification_subsets_metrics(labels_results_i, label_name, prediction_name)
    positives, negatives, true_positives, true_negatives, \
    false_negatives, false_positives, sensitivity, specificity = subsets_and_metrics

    ml_metrics_analysis_name.append(analysis_name)
    ml_metrics_prompt_id.append(prompt_id)
    ml_metrics_label_name.append(label_name)
    ml_metrics_time_stamp.append(timestamp_start_is_photo_analysis)
    ml_metrics_positives.append(positives.shape[0])
    ml_metrics_negatives.append(negatives.shape[0])
    ml_metrics_true_positives.append(true_positives.shape[0])
    ml_metrics_false_positives.append(false_positives.shape[0])
    ml_metrics_true_negatives.append(true_negatives.shape[0])
    ml_metrics_false_negatives.append(false_negatives.shape[0])
    ml_metrics_sensitivity.append(sensitivity)
    ml_metrics_specificity.append(specificity)

    ml_metrics_one_analysis = pd.DataFrame({})

    ml_metrics_one_analysis['analysis_name'] = ml_metrics_analysis_name
    ml_metrics_one_analysis['time_stamp'] = ml_metrics_time_stamp
    ml_metrics_one_analysis['positives'] = ml_metrics_positives
    ml_metrics_one_analysis['negatives'] = ml_metrics_negatives
    ml_metrics_one_analysis['true_positives'] = ml_metrics_true_positives
    ml_metrics_one_analysis['false_positives'] = ml_metrics_false_positives
    ml_metrics_one_analysis['true_negatives'] = ml_metrics_true_negatives
    ml_metrics_one_analysis['false_negatives'] = ml_metrics_false_negatives
    ml_metrics_one_analysis['sensitivity'] = ml_metrics_sensitivity
    ml_metrics_one_analysis['specificity'] = ml_metrics_specificity
    
    ml_metrics = pd.concat([ml_metrics, ml_metrics_one_analysis], ignore_index=True)


In [None]:
ml_metrics


In [None]:
results_tabular.keys()

In [None]:
label_data_m = label_data.copy()
for key, item in results_tabular['20251023_115108']['predictions'].items():
    print(key)
    print(type(item))
    
    label_data_m = label_data_m.merge(item, how= 'inner', on='image_id')
    print(label_data_m.head())
    

In [None]:
label_data_m

In [None]:
labels_results_i

In [None]:
labels_results

In [None]:
labels_results = labels_results_i

In [None]:
type(labels_results.with_person_pred[0])

In [None]:
timestamp_end_is_photo_analysis

In [None]:
prompt_id

In [None]:
timestamp_id

In [None]:
# Define file name: 
date = str(timestamp_end_is_photo_analysis).split('.')[0][0:10]
filename = 'ml_metrics_struct_minicpm_' + timestamp_id + '.csv'
ml_metrics_output_path = os.path.join(data_path, filename)

# Save csv-file: 
ml_metrics.to_csv(ml_metrics_output_path, index=False)

# Reload saved csv table to check if saving worked:
ml_metrics_reloaded = pd.read_csv(ml_metrics_output_path)
ml_metrics_reloaded.head()


In [None]:
timestamp_id

## Save response dictionary:

In [None]:

# Define file name: 

filename = 'responses_struct_minicpm_' + timestamp_id + '.pkl'

# Save dictionary with LLM responses:
img_analysis_output_path = os.path.join(data_path, filename)
with open(img_analysis_output_path, 'wb') as f:
   pickle.dump(response_dictionaries, f)

# Reload saved dictionary to check if saving worked:
with open(img_analysis_output_path, 'rb') as f:
   reloaded_image_descr = pickle.load(f)

# Check if original and reloaded dictionary are the same:
print(len(image_descr))
print(type(image_descr))
print(type(reloaded_image_descr))
print(len(reloaded_image_descr))

print(image_descr.keys() == reloaded_image_descr.keys())

In [None]:
#reloaded_image_descr

## Save labels and results:

In [None]:

# Define file name: 

results_file_name = 'results_tabular_struct_minicpm_' + timestamp_id + '.pkl'

# Save dictionary with LLM responses:
results_tabular_output_path = os.path.join(data_path, results_file_name)
with open(results_tabular_output_path, 'wb') as f:
   pickle.dump(results_tabular, f)

# Reload saved dictionary to check if saving worked:
with open(results_tabular_output_path, 'rb') as f:
   reloaded_results_tabular = pickle.load(f)

# Check if original and reloaded dictionary are the same:
print(len(results_tabular))
print(type(results_tabular))
print(type(reloaded_results_tabular))
print(len(reloaded_results_tabular))

print(results_tabular.keys() == reloaded_results_tabular.keys())

In [None]:
reloaded_results_tabular.keys()

In [None]:
reloaded_results_tabular['20251023_115108']['predictions']

In [None]:
reloaded_results_tabular['20251023_115108']['predictions']['image_is_photograph']

## Calculate duration of analysis overall:

In [None]:
timestamp_end_workflow = pd.Timestamp.now()
timestamp_end_workflow

## Save time analyses: 

In [None]:

# Define file name: 

time_analyses_df_file_name = 'times_struct_minicpm_' + timestamp_id + '.pkl'

# Save dictionary:
time_analyses_df_output_path = os.path.join(data_path, time_analyses_df_file_name)
with open(time_analyses_df_output_path, 'wb') as f:
   pickle.dump(time_analyses_for_df, f)

# Reload saved dictionary to check if saving worked:
with open(time_analyses_df_output_path, 'rb') as f:
   reloaded_time_analyses_for_df = pickle.load(f)

# Check if original and reloaded dictionary are the same:
print(len(time_analyses_for_df))
print(type(time_analyses_for_df))
print(type(reloaded_time_analyses_for_df))
print(len(reloaded_time_analyses_for_df))

print(time_analyses_for_df.keys() == reloaded_time_analyses_for_df.keys())

In [None]:
cases = (true_positives, false_positives, true_negatives, false_negatives, positives, negatives)
label = 'with_person'
prediction = 'with_person_pred'

file_name = 'confusion_matrix' + '_with_person'

save_path = data_path / file_name


In [None]:
llm_o.save_conf_matrix(labels_results_i, label, prediction, cases, save_path=None)

### Recalculate Measures with recognisable_label as ground truth (instead of person_label):

In [None]:
positive_bools = labels_results.recognisable_label == 1
negative_bools = labels_results.recognisable_label == 0
positive_pred_bools = labels_results.with_person_pred == 1
negative_pred_bools = labels_results.with_person_pred == 0

positives = labels_results[positive_bools]
negatives = labels_results[negative_bools]
true_positives = labels_results[positive_bools & positive_pred_bools]
true_negatives = labels_results[negative_bools & negative_pred_bools]

false_negatives = labels_results[positive_bools & negative_pred_bools]
false_positives = labels_results[negative_bools & positive_pred_bools]

sensitivity = true_positives.shape[0] / positives.shape[0]
print('sensitivity:')
print(sensitivity)

specificity = true_negatives.shape[0] / negatives.shape[0]
print('specificity:')
print(specificity)


In [None]:
false_negatives

In [None]:
print(f'True Positives: {true_positives.shape[0]}')
print(f'False Positives: {false_positives.shape[0]}')
print(f'True Negatives: {true_negatives.shape[0]}')
print(f'False Negatives: {false_negatives.shape[0]}')

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate confusion matrix
cm = confusion_matrix(labels_results.recognisable_label, labels_results.with_person_pred)

number_true_positives = true_positives.shape[0]
number_false_positives = false_positives.shape[0]
number_true_negatives = true_negatives.shape[0]
number_false_negatives = false_negatives.shape[0]

sensitivity = number_true_positives / positives.shape[0]
specificity = number_true_negatives / negatives.shape[0]
#precision = number_true_positives / (number_true_positives + number_false_positives)
#miss_rate = number_false_negatives / positives.shape[0]
#f1_score = 2 * (precision * sensitivity) / (precision + sensitivity)

print("Confusion Matrix:")

plt.figure(figsize=(8,6))
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                          [number_false_negatives, number_true_positives]]
sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
            xticklabels=['Predicted Negative', 'Predicted Positive'], 
            yticklabels=['Actual Negative', 'Actual Positive'])
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

print(f'True Positives: {number_true_positives}')
print(f'False Positives: {number_false_positives}')
print(f'True Negatives: {number_true_negatives}')
print(f'False Negatives: {number_false_negatives}')
print(f'\nSensitivity (Recall): {sensitivity:.4f}')
print(f'Specificity: {specificity:.4f}')
#print(f'Precision: {precision:.4f}')
#print(f'Miss Rate (False Negative Rate): {miss_rate:.4f}')
#print(f'F1 Score: {f1_score:.4f}')

In [None]:
plt.figure(figsize=(15,8))
gs = plt.GridSpec(1, 2, width_ratios=[2, 1])

plt.subplot(gs[0])
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                         [number_false_negatives, number_true_positives]]
heatmap = sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
           xticklabels=['Predicted Negative', 'Predicted Positive'], 
           yticklabels=['Actual Negative', 'Actual Positive'],
           cbar_kws={'label': 'Number of Instances'})
plt.title('Confusion Matrix')

plt.subplot(gs[1])
plt.axis('off')
#metrics_text = (f'Performance Metrics:\n\n'
#               f'True Positives: {number_true_positives}\n'
#               f'False Positives: {number_false_positives}\n'
#               f'True Negatives: {number_true_negatives}\n'
#               f'False Negatives: {number_false_negatives}\n\n'
#               f'Sensitivity: {sensitivity:.4f}\n'
#               f'Specificity: {specificity:.4f}\n'
#               f'Precision: {precision:.4f}\n'
#               f'Miss Rate: {miss_rate:.4f}\n'
#               f'F1 Score: {f1_score:.4f}')

metrics_text = (f'Performance Metrics:\n\n'
               f'True Positives: {number_true_positives}\n'
               f'False Positives: {number_false_positives}\n'
               f'True Negatives: {number_true_negatives}\n'
               f'False Negatives: {number_false_negatives}\n\n'
               f'Sensitivity: {sensitivity:.4f}\n'
               f'Specificity: {specificity:.4f}\n')
plt.text(0, 0.5, metrics_text, fontsize=10, 
        verticalalignment='center')

plt.suptitle('Person Detection: Confusion Matrix and Performance Metrics Based on the Recognisable Label as Ground Truth', fontsize=16)
plt.tight_layout()
output_path = data_path / 'confusion_matrix_metrics_recognisable.pdf'
plt.savefig(output_path)
plt.close()

### Calculate sensitivity and specificity for photography predictions and get lists images with positive photography predictions:

In [None]:
positive_bools = labels_results.is_photo == 1
negative_bools = labels_results.is_photo == 0
positive_pred_bools = labels_results.is_photo_pred == 1
negative_pred_bools = labels_results.is_photo_pred == 0

positives = labels_results[positive_bools]
negatives = labels_results[negative_bools]
true_positives = labels_results[positive_bools & positive_pred_bools]
true_negatives = labels_results[negative_bools & negative_pred_bools]

false_negatives = labels_results[positive_bools & negative_pred_bools]
false_positives = labels_results[negative_bools & positive_pred_bools]

sensitivity = true_positives.shape[0] / positives.shape[0]
print('sensitivity:')
print(sensitivity)

specificity = true_negatives.shape[0] / negatives.shape[0]
print('specificity:')
print(specificity)

In [None]:
print(f'True Positives: {true_positives.shape[0]}')
print(f'False Positives: {false_positives.shape[0]}')
print(f'True Negatives: {true_negatives.shape[0]}')
print(f'False Negatives: {false_negatives.shape[0]}')

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate confusion matrix
cm = confusion_matrix(labels_results.is_photo, labels_results.is_photo_pred)

number_true_positives = true_positives.shape[0]
number_false_positives = false_positives.shape[0]
number_true_negatives = true_negatives.shape[0]
number_false_negatives = false_negatives.shape[0]

sensitivity = number_true_positives / positives.shape[0]
specificity = number_true_negatives / negatives.shape[0]
precision = number_true_positives / (number_true_positives + number_false_positives)
miss_rate = number_false_negatives / positives.shape[0]
f1_score = 2 * (precision * sensitivity) / (precision + sensitivity)

print("Confusion Matrix:")

plt.figure(figsize=(8,6))
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                          [number_false_negatives, number_true_positives]]
sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
            xticklabels=['Predicted Negative', 'Predicted Positive'], 
            yticklabels=['Actual Negative', 'Actual Positive'])
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

print(f'True Positives: {number_true_positives}')
print(f'False Positives: {number_false_positives}')
print(f'True Negatives: {number_true_negatives}')
print(f'False Negatives: {number_false_negatives}')
print(f'\nSensitivity (Recall): {sensitivity:.4f}')
print(f'Specificity: {specificity:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Miss Rate (False Negative Rate): {miss_rate:.4f}')
print(f'F1 Score: {f1_score:.4f}')

In [None]:
plt.figure(figsize=(15,8))
gs = plt.GridSpec(1, 2, width_ratios=[2, 1])

plt.subplot(gs[0])
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                         [number_false_negatives, number_true_positives]]
heatmap = sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
           xticklabels=['Predicted Negative', 'Predicted Positive'], 
           yticklabels=['Actual Negative', 'Actual Positive'],
           cbar_kws={'label': 'Number of Instances'})
plt.title('Confusion Matrix')

plt.subplot(gs[1])
plt.axis('off')
metrics_text = (f'Performance Metrics:\n\n'
               f'True Positives: {number_true_positives}\n'
               f'False Positives: {number_false_positives}\n'
               f'True Negatives: {number_true_negatives}\n'
               f'False Negatives: {number_false_negatives}\n\n'
               f'Sensitivity: {sensitivity:.4f}\n'
               f'Specificity: {specificity:.4f}\n'
               f'Precision: {precision:.4f}\n'
               f'Miss Rate: {miss_rate:.4f}\n'
               f'F1 Score: {f1_score:.4f}')
plt.text(0, 0.5, metrics_text, fontsize=10, 
        verticalalignment='center')

plt.suptitle('Photography Detection: Confusion Matrix and Performance Metrics Based on is_photo Label as Ground Truth', fontsize=16)
plt.tight_layout()
output_path = data_path / 'confusion_matrix_metrics_is_photo.pdf'
plt.savefig(output_path)
plt.close()

### Calculate sensitivity and specificity for church predictions and get lists images with positive church predictions:

In [None]:
positive_bools = labels_results.church == 1
negative_bools = labels_results.church == 0
positive_pred_bools = labels_results.with_church_pred == 1
negative_pred_bools = labels_results.with_church_pred == 0

positives = labels_results[positive_bools]
negatives = labels_results[negative_bools]
true_positives = labels_results[positive_bools & positive_pred_bools]
true_negatives = labels_results[negative_bools & negative_pred_bools]

false_negatives = labels_results[positive_bools & negative_pred_bools]
false_positives = labels_results[negative_bools & positive_pred_bools]

sensitivity = true_positives.shape[0] / positives.shape[0]
print('sensitivity:')
print(sensitivity)

specificity = true_negatives.shape[0] / negatives.shape[0]
print('specificity:')
print(specificity)

In [None]:
print(f'True Positives: {true_positives.shape[0]}')
print(f'False Positives: {false_positives.shape[0]}')
print(f'True Negatives: {true_negatives.shape[0]}')
print(f'False Negatives: {false_negatives.shape[0]}')

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate confusion matrix
cm = confusion_matrix(labels_results.church, labels_results.with_church_pred)

number_true_positives = true_positives.shape[0]
number_false_positives = false_positives.shape[0]
number_true_negatives = true_negatives.shape[0]
number_false_negatives = false_negatives.shape[0]

sensitivity = number_true_positives / positives.shape[0]
specificity = number_true_negatives / negatives.shape[0]
#precision = number_true_positives / (number_true_positives + number_false_positives)
#miss_rate = number_false_negatives / positives.shape[0]
#f1_score = 2 * (precision * sensitivity) / (precision + sensitivity)

print("Confusion Matrix:")

plt.figure(figsize=(8,6))
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                          [number_false_negatives, number_true_positives]]
sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
            xticklabels=['Predicted Negative', 'Predicted Positive'], 
            yticklabels=['Actual Negative', 'Actual Positive'])
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

print(f'True Positives: {number_true_positives}')
print(f'False Positives: {number_false_positives}')
print(f'True Negatives: {number_true_negatives}')
print(f'False Negatives: {number_false_negatives}')
print(f'\nSensitivity (Recall): {sensitivity:.4f}')
print(f'Specificity: {specificity:.4f}')
#print(f'Precision: {precision:.4f}')
#print(f'Miss Rate (False Negative Rate): {miss_rate:.4f}')
#print(f'F1 Score: {f1_score:.4f}')

In [None]:
plt.figure(figsize=(15,8))
gs = plt.GridSpec(1, 2, width_ratios=[2, 1])

plt.subplot(gs[0])
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                         [number_false_negatives, number_true_positives]]
heatmap = sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
           xticklabels=['Predicted Negative', 'Predicted Positive'], 
           yticklabels=['Actual Negative', 'Actual Positive'],
           cbar_kws={'label': 'Number of Instances'})
plt.title('Confusion Matrix')

plt.subplot(gs[1])
plt.axis('off')
#metrics_text = (f'Performance Metrics:\n\n'
#               f'True Positives: {number_true_positives}\n'
#               f'False Positives: {number_false_positives}\n'
#               f'True Negatives: {number_true_negatives}\n'
#               f'False Negatives: {number_false_negatives}\n\n'
#               f'Sensitivity: {sensitivity:.4f}\n'
#               f'Specificity: {specificity:.4f}\n'
#               f'Precision: {precision:.4f}\n'
#               f'Miss Rate: {miss_rate:.4f}\n'
#               f'F1 Score: {f1_score:.4f}')

metrics_text = (f'Performance Metrics:\n\n'
               f'True Positives: {number_true_positives}\n'
               f'False Positives: {number_false_positives}\n'
               f'True Negatives: {number_true_negatives}\n'
               f'False Negatives: {number_false_negatives}\n\n'
               f'Sensitivity: {sensitivity:.4f}\n'
               f'Specificity: {specificity:.4f}\n')

plt.text(0, 0.5, metrics_text, fontsize=10, 
        verticalalignment='center')

plt.suptitle('Church Detection: Confusion Matrix and Performance Metrics Based on the church Label as Ground Truth', fontsize=16)
plt.tight_layout()
output_path = data_path / 'confusion_matrix_metrics_with_church.pdf'
plt.savefig(output_path)
plt.close()

In [None]:
false_positives

In [None]:
false_negatives

In [None]:
false_curch_path = root_path / 'false_church_pred'
church_false_positives_path = false_curch_path / 'false_positives'
church_false_negatives_path = false_curch_path / 'false_negatives'

In [None]:
church_false_positives_path

In [None]:
# Move image files that predicted to not be landscape photographies: 
for idx, row in false_positives.iterrows():
    print(idx)
    img_id = row['image_id']
    is_photo = row['is_photo_pred']
    #print(is_photo)
    file_name = 'BernerOberland' + img_id + '.tif'
    print(file_name)
    source_path = tif_data_path / file_name
    dest_path = church_false_positives_path / file_name
    print(source_path)
    print(dest_path)
    shutil.copy(source_path, dest_path)


In [None]:
# Move image files that predicted to not be landscape photographies: 
for idx, row in false_negatives.iterrows():
    print(idx)
    img_id = row['image_id']
    is_photo = row['is_photo_pred']
    #print(is_photo)
    file_name = 'BernerOberland' + img_id + '.tif'
    source_path = tif_data_path / file_name
    dest_path = church_false_negatives_path / file_name
    print(dest_path)
    shutil.copy(source_path, dest_path)

### Visually inspect the images in the two folders!

Visually verified all classified images, false negatives are all images with non-recognisable persons (according to my judgement).

## Save labels and results:

In [None]:
labels_results

In [None]:
# Add image ids that will remain string type even when saved to csv and reloaded:
labels = list(labels_results.image_id)
new_labels = img_idc.complete_image_ids(labels)
labels_results['image_id_str'] = new_labels
labels_results

In [None]:
current_timestamp = pd.Timestamp.now()
current_timestamp

In [None]:
current_date_time = current_timestamp.strftime('%Y-%m-%d %H:%M')
current_date_time

In [None]:
#labels_results.to_csv(data_path /'results_img_analysis_minicpm_2025.07.17.csv')
results_file_name = 'results_img_analysis_minicpm_' + current_date_time + '.csv'
labels_results.to_csv(data_path /results_file_name)