In [None]:
import os
from pathlib import Path
from ultralytics import YOLO
from PIL import Image
import shutil
import pandas as pd
from source import image_id_converter as img_idc
from source import sort_img_files as sif
import matplotlib.pyplot as plt

In [None]:
import ollama
import json
import re
import pickle

In [None]:
os.getcwd()

# Using LLM (mini-CPM) for image analysis

## Define Functions:

In [None]:
import os
from PIL import Image



def convert_tif_to_jpg(source_folder, destination_folder, quality=85):
    """
    Convert .tif files to .jpg format and move copies to destination folder.
    Original .tif files remain in source folder.
    
    Args:
        source_folder (str): Path to folder containing .tif files
        destination_folder (str): Path to destination folder for .jpg files
        quality (int): JPEG quality (1-100, default 85)
    """
    # Create destination folder if it doesn't exist
    os.makedirs(destination_folder, exist_ok=True)
    
    converted_files = []
    
    # Process all .tif files in source folder
    for filename in os.listdir(source_folder):
        if filename.lower().endswith(('.tif', '.tiff')):
            source_path = os.path.join(source_folder, filename)
            
            # Create new filename with .jpg extension
            base_name = os.path.splitext(filename)[0]
            jpg_filename = f"{base_name}.jpg"
            destination_path = os.path.join(destination_folder, jpg_filename)
            
            try:
                # Open and convert image
                with Image.open(source_path) as img:
                    # Convert to RGB if necessary (TIFF might be in different modes)
                    if img.mode != 'RGB':
                        img = img.convert('RGB')
                    
                    # Save as JPEG in destination folder
                    img.save(destination_path, 'JPEG', quality=quality, optimize=True)
                
                converted_files.append(jpg_filename)
                print(f"Converted: {filename} -> {jpg_filename}")
                
            except Exception as e:
                print(f"Error converting {filename}: {str(e)}")
    
    print(f"Successfully converted {len(converted_files)} files")
    return converted_files

# Example usage:
# convert_tif_to_jpg("/path/to/source", "/path/to/destination")

In [None]:
from PIL import Image
import os
from pathlib import Path

def convert_image_if_needed(image_path):
    """Convert TIFF (and other unsupported formats) to JPG."""
    path = Path(image_path)
    
    if path.suffix.lower() in ['.tif', '.tiff']:
        try:
            img = Image.open(path)
            if img.mode != 'RGB':
                img = img.convert('RGB')
            
            # Build new path manually
            jpg_path = path.parent / f"{path.stem}_converted.jpg"
            
            img.save(jpg_path, 'JPEG', quality=95)
            print(f"Converted {path} to {jpg_path}")
            return str(jpg_path)
        except Exception as e:
            print(f"Error converting {path}: {e}")
            return None
    else:
        return str(path)


def call_ollama_model(image_path, prompt):
    """Make the API call to Ollama."""
    # Convert image if needed
    processed_path = convert_image_if_needed(image_path)
    if processed_path is None:
        raise ValueError(f"Could not process image: {image_path}")
    
    response = ollama.chat(
        model="minicpm-v",  
        messages=[{
            'role': 'user', 
            'content': prompt,
            'images': [processed_path]
        }],
        options={
        'temperature': 0.1,  # Lower = more deterministic (0.0 to 1.0)
        'seed': 42           # Fixed seed for reproducibility
    }
    )
    return response['message']['content']

In [None]:
def create_analysis_prompt():
    """Create the structured prompt for image analysis."""
    return """
    Analyze this image and return ONLY a Python dictionary in exactly this format:
    
    {
        'image_type': [],  # List all that apply: photography, drawing, painting, statistics figure, map, scheme, other
        'person': X,              # 1 if present, 0 if not
        'mountain': X,            # 1 if present, 0 if not
        'river': X,               # 1 if present, 0 if not
        'lake': X,                # 1 if present, 0 if not
        'building': X,            # 1 if present, 0 if not
        'church': X,              # 1 if present, 0 if not
        'city': X,                # 1 if present, 0 if not
        'village': X,             # 1 if present, 0 if not
        'glacier': X,             # 1 if present, 0 if not
        'other_objects': [],      # List of other noteworthy/dominant objects
        'additional_comments': '' # Any additional observations or empty string if none
    }
    
    Replace X with 1 (present) or 0 (not present).
    Return ONLY the dictionary, no other text.
    Your answer MUST have the exact structue of the dictionary described above (all keys MUST be present). 
    If you cannot answer the question in the way implied by this structure, enter 'None' as value and offer 
    your answer and explanations under 'additional_comments'.
    """

In [None]:
import ast
import re

def parse_response_to_dict(response_text):
    """Parse the model response into a Python dictionary."""
    try:
        # First try to find dictionary in code blocks
        code_block_match = re.search(r'```(?:python)?\s*(\{.*?\})\s*```', response_text, re.DOTALL)
        if code_block_match:
            dict_str = code_block_match.group(1)
        else:
            # Fallback to finding any dictionary pattern
            dict_match = re.search(r'\{.*\}', response_text, re.DOTALL)
            if dict_match:
                dict_str = dict_match.group()
            else:
                return False, None
        
        # Clean up the dictionary string
        dict_str = dict_str.replace('\\_', '_')
        dict_str = dict_str.strip()
        
        # Parse the dictionary
        result_dict = ast.literal_eval(dict_str)
        return True, result_dict
        
    except Exception as e:
        return False, None

In [None]:
def analyze_image_structured(image_path):
    """Main function that orchestrates the image analysis."""
    # Define prompt for LLM model:
    prompt = create_analysis_prompt()
    # Ask LLM to analyse image, by calling the model and providing 
    # the defined prompt: 
    response_text = call_ollama_model(image_path, prompt)
    # Parse response text, i.e. find dictionary of expected structure
    # in the response text:
    success, result_dict = parse_response_to_dict(response_text)
    
    if success:
        return result_dict
    else:
        # Save response text in dictionary paired with key "raw_response"
        # if parsing the response text fails:
        llm_response = {"raw_response": response_text}
        return llm_response

In [None]:
def add_pred_values(idx, labels_results, columns, values_to_add):
    selection_bools = labels_results.image_id == inspection_idx
    
    labels_results.loc[selection_bools, columns] = values_to_add

## Set paths:

In [None]:
#root_path = Path('/Users/stephanehess/Documents/CAS_AML/dias_digit_project')
#root_path = Path('/Users/stephanehess/Documents/CAS_AML/dias_digit_project/test_yolo_object_train')

project_path = Path.cwd()
root_path = (project_path / '..').resolve()
#root_path = (project_path / '..' / 'test_yolo_object_train').resolve()
data_path = root_path / 'data'
tif_data_path = root_path / 'data_1'
#data_path = root_path / 'visual_genome_data_all'
jpg_data_path = root_path / 'data_jpg'
#yolo_path = root_path / 'visual_genome_yolo_all'
output_dir_not_photo = root_path / 'not_photo'
output_dir_with_person = root_path / 'with_person'
output_dir_without_person = root_path / 'without_person'



In [None]:
project_path

In [None]:
root_path

In [None]:
data_path

In [None]:
tif_data_path

### Copy and convert image files from tif_data_path to jpg_data_path:

In [None]:

source_folder = tif_data_path
destination_folder = jpg_data_path

convert_tif_to_jpg(source_folder, destination_folder, quality=100)


## Create directories for sorting the images:

In [None]:
# Create output directories
#os.chdir(root_path/'..')
os.makedirs(output_dir_not_photo, exist_ok=True)
os.makedirs(output_dir_with_person, exist_ok=True)
os.makedirs(output_dir_without_person, exist_ok=True)
#os.chdir('root_path')

## Loop through images and analyze with miniCPM (LLM model):

In [None]:
# Make sure no .DS_Store file is included in jpg_data_path: 
import os
ds_file_path = jpg_data_path / '.DS_Store'
# Remove a specific .DS_Store file
if os.path.exists(ds_file_path):
    os.remove(ds_file_path)
    print("Removed .DS_Store")
else:
    print(".DS_Store not found")

In [None]:
# Get list of image files to analyse: 
image_files = os.listdir(jpg_data_path)

# Specify names of categorical variables that the create_analysis_prompt refers to 
# (for image_type one of a range of categories is expected,
# the other ones are one-hot coded):
keys_list_expected = ['image_type', 'person', 'mountain', 'river', 
                      'lake', 'building', 'church', 'city', 'village', 
                      'glacier', 'other_objects', 'additional_comments']

# Make empty dictionary to store results:
image_descr = {}

# Loop through images to get answers: 
for image_file in image_files:
    image_path = jpg_data_path / image_file
    path_str = str(image_path)
    print('\n')
    print(path_str)
    parts = path_str.split('.jpg')
    img_id = parts[-2][-3:]

    # Analyse image, get values for each of the categorical variables specified above:
    image_description = analyze_image_structured(image_path)
    
    dict_type_bool = type(image_description) == dict
        
    print(image_description)
    image_descr[img_id] = image_description

In [None]:
len(image_descr)

In [None]:
type(image_descr)

### Save dictionary with LLM responses:

In [None]:
img_analysis_output_path = os.path.join(data_path, 'image_analysis_minicpm_2025.07.17.pkl')
img_analysis_output_path

In [None]:
with open(img_analysis_output_path, 'wb') as f:
   pickle.dump(image_descr, f)

In [None]:
with open(img_analysis_output_path, 'rb') as f:
   reloaded_image_descr = pickle.load(f)

In [None]:
type(reloaded_image_descr)

In [None]:
len(reloaded_image_descr)

In [None]:
#image_descr = reloaded_image_descr

In [None]:
image_descr.keys()

In [None]:
reloaded_image_descr.keys()

### Load person label data (ground truth) to compare to LLM responses:

The file with_without_person.csv contains labels added by (human) visual inspection that represent the ground truth. 
 * Column with_person: whether or not any person is in the image.
 * Column recognisable: whether any person that would be recognisable to a human familiar with said person is in the image.

In [None]:
label_data = pd.read_csv(data_path/'labels_mod.csv')
label_data.head()

In [None]:
img_ids = list(label_data.image_id)

In [None]:
# Reconvert image ids to integers (e.g. '234') as strings from the form they were saved in (e.g. 'id234' to ensure 
# string data type to deal with duck typing): 
label_data['image_id'] = img_idc.reconvert_image_ids(img_ids)

In [None]:
label_data.head()

### Rename the labels:

In [None]:
label_data.rename(columns={'with_person': 'person_label', 'person_recognisable': 'recognisable_label'}, inplace=True)
label_data.head()


### Loop through Responses from the LLM and incorporate them into a Data Frame:

In [None]:
# Prepare empty lists to store results
img_ids = []
img_type = []
is_photo = []
with_person = []
with_church = []

# Get list of image ids: 
img_ids = label_data.image_id

# Make empty list to store responses that cannot be parsed
# due to faulty structure for closer inspection: 
img_ids_closer_inspection = []

iter_count = 0

# List of keys expected in each dictionary provided as an answer by
# the LLM:
keys_list_expected = ['image_type', 'person', 'mountain', 'river', 
                      'lake', 'building', 'church', 'city', 'village', 
                      'glacier', 'other_objects', 'additional_comments']

# Loop through image ids:
for img_id in img_ids:

    # Get response from LLM for image id in question:
    img_pred = image_descr[img_id]

    # Get keys from response dictionary:
    keys_list = list(img_pred.keys())

    # Check if structure and keys of response match expectation:
    dict_struct_condition = (type(img_pred) == dict)
    keys_condition = (keys_list_expected == keys_list)

    # Check if response key 
    raw_key_condition = keys_list == ['raw_response']
    
    # If the llm response corresponds to the expected
    # structure, get response values as planned:
    if dict_struct_condition and keys_condition:
        
        image_type_values = img_pred['image_type']

        is_photo_bool = 'photography' in image_type_values
        if is_photo_bool:
            is_photo_value = 1
        else:
            is_photo_value = 0
        
        person_value = img_pred['person']
        
        church_value = img_pred['church']

        img_type.append(image_type_values)
        is_photo.append(is_photo_value)
        with_person.append(person_value)
        with_church.append(church_value)
        
    # If llm response does not correspond to the expected 
    # structure but does have the 'raw_response' key
    # try to identify a dictionary inside the response text
    # and try to parse this dictionary as planned:
    elif dict_struct_condition and raw_key_condition:
        print('\n')
        print('raw_repsonse_dict:')
        print(img_id)
        print(dict_struct_condition)
        print(raw_key_condition)

        response_text = img_pred['raw_response']

        start_indices = [i for i, char in enumerate(response_text) if char == '{']
        start_idx = start_indices[0]
        
        end_indices = [i for i, char in enumerate(response_text) if char == '}']
        end_idx = end_indices[0]

        dict_in_text = response_text[start_idx:end_idx+1]

        success_bool, img_pred = parse_response_to_dict(dict_in_text)
        print('success_bool:')
        print(success_bool)

        # If a dictionary is found and parsed successfully
        # get response values as planned:
        if success_bool:
            print(type(img_pred))
            print(img_pred.keys())
            
            image_type_values = img_pred['image_type']
    
            is_photo_bool = 'photography' in image_type_values
            if is_photo_bool:
                is_photo_value = 1
            else:
                is_photo_value = 0
            
            person_value = img_pred['person']
            
            church_value = img_pred['church']

            img_type.append(image_type_values)
            is_photo.append(is_photo_value)
            with_person.append(person_value)
            with_church.append(church_value)
            
        else:
            # If dictionary is not found or not successfully
            # parsed, add the image in question to the list
            # of images for closer (visual) inspection:
            print('parse unsuccessful')
            print(img_id)
            img_ids_closer_inspection.append(img_id)
            img_type.append(None)
            is_photo.append(None)
            with_person.append(None)
            with_church.append(None)

    # If the llm response does not have the expected struture
    # and no 'raw_response' key is found, add the image in 
    # question to the list of images for closer (visual)
    # inspection:
    else:
        print('\n')
        print('no structure at all:')
        print(img_id)
        img_ids_closer_inspection.append(img_id)
        img_type.append(None)
        is_photo.append(None)
        with_person.append(None)
        with_church.append(None)
        
        
    
    iter_count += 1


In [None]:
# Check if all response variable lists have the same length:
print(len(img_ids))
print(len(img_type))
print(len(is_photo))
print(len(with_person))
print(len(with_church))
print(len(img_ids))

In [None]:
# Check image list for closer inspection:
img_ids_closer_inspection

In [None]:
# Put response variables into data frame: 
predictions = pd.DataFrame({'image_id': img_ids, 
                           'is_photo_pred': is_photo,
                           'with_person_pred': with_person,
                           'with_church_pred': with_church})

In [None]:
predictions.head()

### Merge label data with the predictions:

In [None]:
labels_results = label_data.merge(predictions, how='inner', on='image_id')
labels_results.head()

In [None]:
labels_results.shape

In [None]:
img_ids_closer_inspection[0]

### Amend results manually by taking into account unstructured answers (images for closer inspection):

In [None]:
# Get index for the image information for closer inspection:
inspection_idx = img_ids_closer_inspection[0]

In [None]:
# Check if values are indeed missing:
labels_results[labels_results.image_id == inspection_idx]

In [None]:
# Check available information: 
image_descr[inspection_idx]

In [None]:
# Add information to labels_results dataframe manually:
columns = ['is_photo_pred', 'with_person_pred', 'with_church_pred']
add_pred_values(inspection_idx, labels_results, columns, [int(0), int(0), int(0)])

In [None]:
# Check if values have been added:
labels_results[labels_results.image_id == inspection_idx]

In [None]:
0

In [None]:
# Get index for the image information for closer inspection:
inspection_idx = img_ids_closer_inspection[1]

In [None]:
# Check if values are indeed missing:
labels_results[labels_results.image_id == inspection_idx]

In [None]:
# Check available information: 
image_descr[inspection_idx]

In [None]:
# Add information to labels_results dataframe manually:
columns = ['is_photo_pred', 'with_person_pred', 'with_church_pred']
add_pred_values(inspection_idx, labels_results, columns, [0, 0, 0])

In [None]:
# Check if values have been added:
labels_results[labels_results.image_id == inspection_idx]

In [None]:
# Get index for the image information for closer inspection:
inspection_idx = img_ids_closer_inspection[2]

In [None]:
# Check if values are indeed missing:
labels_results[labels_results.image_id == inspection_idx]

In [None]:
# Check available information: 
image_descr[inspection_idx]

In [None]:
# Add information to labels_results dataframe manually:
columns = ['is_photo_pred', 'with_person_pred', 'with_church_pred']
add_pred_values(inspection_idx, labels_results, columns, [1, 0, 0])

In [None]:
# Check if values are indeed missing:
labels_results[labels_results.image_id == inspection_idx]

### Convert data type of added 

In [None]:
labels_results.astype({'is_photo_pred': 'int', 'with_person_pred': 'int',
                       'with_church_pred': 'int'})

In [None]:
print(output_dir_not_photo)
print(output_dir_with_person)

In [None]:
# Move image files that predicted to not be landscape photographies: 
for idx, row in labels_results.iterrows():
    #print(idx)
    img_id = row['image_id']
    is_photo = row['is_photo_pred']
    #print(is_photo)
    file_name = 'BernerOberland' + img_id + '.jpg'
    if int(is_photo) == 0:
        source_path = jpg_data_path / file_name
        dest_path = output_dir_not_photo / file_name
        #print(dest_path)
        shutil.move(source_path, dest_path)


### Calculate sensitivity and specificity for person predictions and get lists images with positive person predictions:

In [None]:
positive_bools = labels_results.person_label == 1
negative_bools = labels_results.person_label == 0
positive_pred_bools = labels_results.with_person_pred == 1
negative_pred_bools = labels_results.with_person_pred == 0

positives = labels_results[positive_bools]
negatives = labels_results[negative_bools]
true_positives = labels_results[positive_bools & positive_pred_bools]
true_negatives = labels_results[negative_bools & negative_pred_bools]

false_negatives = labels_results[positive_bools & negative_pred_bools]
false_positives = labels_results[negative_bools & positive_pred_bools]

sensitivity = true_positives.shape[0] / positives.shape[0]
print('sensitivity:')
print(sensitivity)

specificity = true_negatives.shape[0] / negatives.shape[0]
print('specificity:')
print(specificity)


### Inspect false negatives:

In [None]:
false_negatives

### Inspect false positives:

In [None]:
false_positives

In [None]:
print(f'True Positives: {true_positives.shape[0]}')
print(f'False Positives: {false_positives.shape[0]}')
print(f'True Negatives: {true_negatives.shape[0]}')
print(f'False Negatives: {false_negatives.shape[0]}')

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate confusion matrix
cm = confusion_matrix(labels_results.person_label, labels_results.with_person_pred)

number_true_positives = true_positives.shape[0]
number_false_positives = false_positives.shape[0]
number_true_negatives = true_negatives.shape[0]
number_false_negatives = false_negatives.shape[0]

sensitivity = number_true_positives / positives.shape[0]
specificity = number_true_negatives / negatives.shape[0]
precision = number_true_positives / (number_true_positives + number_false_positives)
miss_rate = number_false_negatives / positives.shape[0]
f1_score = 2 * (precision * sensitivity) / (precision + sensitivity)

print("Confusion Matrix:")

plt.figure(figsize=(8,6))
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                          [number_false_negatives, number_true_positives]]
sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
            xticklabels=['Predicted Negative', 'Predicted Positive'], 
            yticklabels=['Actual Negative', 'Actual Positive'])
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

print(f'True Positives: {number_true_positives}')
print(f'False Positives: {number_false_positives}')
print(f'True Negatives: {number_true_negatives}')
print(f'False Negatives: {number_false_negatives}')
print(f'\nSensitivity (Recall): {sensitivity:.4f}')
print(f'Specificity: {specificity:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Miss Rate (False Negative Rate): {miss_rate:.4f}')
print(f'F1 Score: {f1_score:.4f}')

In [None]:
plt.figure(figsize=(15,8))
gs = plt.GridSpec(1, 2, width_ratios=[2, 1])

plt.subplot(gs[0])
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                         [number_false_negatives, number_true_positives]]
heatmap = sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
           xticklabels=['Predicted Negative', 'Predicted Positive'], 
           yticklabels=['Actual Negative', 'Actual Positive'],
           cbar_kws={'label': 'Number of Instances'})
plt.title('Confusion Matrix')

plt.subplot(gs[1])
plt.axis('off')
metrics_text = (f'Performance Metrics:\n\n'
               f'True Positives: {number_true_positives}\n'
               f'False Positives: {number_false_positives}\n'
               f'True Negatives: {number_true_negatives}\n'
               f'False Negatives: {number_false_negatives}\n\n'
               f'Sensitivity: {sensitivity:.4f}\n'
               f'Specificity: {specificity:.4f}\n'
               f'Precision: {precision:.4f}\n'
               f'Miss Rate: {miss_rate:.4f}\n'
               f'F1 Score: {f1_score:.4f}')
plt.text(0, 0.5, metrics_text, fontsize=10, 
        verticalalignment='center')

plt.suptitle('Person Detection: Confusion Matrix and Performance Metrics Based on the Person Label as Ground Truth', fontsize=16)
plt.tight_layout()
output_path = data_path / 'confusion_matrix_metrics_person.pdf'
plt.savefig(output_path)
plt.close()

### Recalculate Measures with recognisable_label as ground truth (instead of person_label):

In [None]:
positive_bools = labels_results.recognisable_label == 1
negative_bools = labels_results.recognisable_label == 0
positive_pred_bools = labels_results.with_person_pred == 1
negative_pred_bools = labels_results.with_person_pred == 0

positives = labels_results[positive_bools]
negatives = labels_results[negative_bools]
true_positives = labels_results[positive_bools & positive_pred_bools]
true_negatives = labels_results[negative_bools & negative_pred_bools]

false_negatives = labels_results[positive_bools & negative_pred_bools]
false_positives = labels_results[negative_bools & positive_pred_bools]

sensitivity = true_positives.shape[0] / positives.shape[0]
print('sensitivity:')
print(sensitivity)

specificity = true_negatives.shape[0] / negatives.shape[0]
print('specificity:')
print(specificity)


In [None]:
false_negatives

In [None]:
print(f'True Positives: {true_positives.shape[0]}')
print(f'False Positives: {false_positives.shape[0]}')
print(f'True Negatives: {true_negatives.shape[0]}')
print(f'False Negatives: {false_negatives.shape[0]}')

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate confusion matrix
cm = confusion_matrix(labels_results.recognisable_label, labels_results.with_person_pred)

number_true_positives = true_positives.shape[0]
number_false_positives = false_positives.shape[0]
number_true_negatives = true_negatives.shape[0]
number_false_negatives = false_negatives.shape[0]

sensitivity = number_true_positives / positives.shape[0]
specificity = number_true_negatives / negatives.shape[0]
precision = number_true_positives / (number_true_positives + number_false_positives)
miss_rate = number_false_negatives / positives.shape[0]
f1_score = 2 * (precision * sensitivity) / (precision + sensitivity)

print("Confusion Matrix:")

plt.figure(figsize=(8,6))
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                          [number_false_negatives, number_true_positives]]
sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
            xticklabels=['Predicted Negative', 'Predicted Positive'], 
            yticklabels=['Actual Negative', 'Actual Positive'])
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

print(f'True Positives: {number_true_positives}')
print(f'False Positives: {number_false_positives}')
print(f'True Negatives: {number_true_negatives}')
print(f'False Negatives: {number_false_negatives}')
print(f'\nSensitivity (Recall): {sensitivity:.4f}')
print(f'Specificity: {specificity:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Miss Rate (False Negative Rate): {miss_rate:.4f}')
print(f'F1 Score: {f1_score:.4f}')

In [None]:
plt.figure(figsize=(15,8))
gs = plt.GridSpec(1, 2, width_ratios=[2, 1])

plt.subplot(gs[0])
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                         [number_false_negatives, number_true_positives]]
heatmap = sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
           xticklabels=['Predicted Negative', 'Predicted Positive'], 
           yticklabels=['Actual Negative', 'Actual Positive'],
           cbar_kws={'label': 'Number of Instances'})
plt.title('Confusion Matrix')

plt.subplot(gs[1])
plt.axis('off')
metrics_text = (f'Performance Metrics:\n\n'
               f'True Positives: {number_true_positives}\n'
               f'False Positives: {number_false_positives}\n'
               f'True Negatives: {number_true_negatives}\n'
               f'False Negatives: {number_false_negatives}\n\n'
               f'Sensitivity: {sensitivity:.4f}\n'
               f'Specificity: {specificity:.4f}\n'
               f'Precision: {precision:.4f}\n'
               f'Miss Rate: {miss_rate:.4f}\n'
               f'F1 Score: {f1_score:.4f}')
plt.text(0, 0.5, metrics_text, fontsize=10, 
        verticalalignment='center')

plt.suptitle('Person Detection: Confusion Matrix and Performance Metrics Based on the Recognisable Label as Ground Truth', fontsize=16)
plt.tight_layout()
output_path = data_path / 'confusion_matrix_metrics_recognisable.pdf'
plt.savefig(output_path)
plt.close()

### Calculate sensitivity and specificity for photography predictions and get lists images with positive photography predictions:

In [None]:
positive_bools = labels_results.is_photo == 1
negative_bools = labels_results.is_photo == 0
positive_pred_bools = labels_results.is_photo_pred == 1
negative_pred_bools = labels_results.is_photo_pred == 0

positives = labels_results[positive_bools]
negatives = labels_results[negative_bools]
true_positives = labels_results[positive_bools & positive_pred_bools]
true_negatives = labels_results[negative_bools & negative_pred_bools]

false_negatives = labels_results[positive_bools & negative_pred_bools]
false_positives = labels_results[negative_bools & positive_pred_bools]

sensitivity = true_positives.shape[0] / positives.shape[0]
print('sensitivity:')
print(sensitivity)

specificity = true_negatives.shape[0] / negatives.shape[0]
print('specificity:')
print(specificity)

In [None]:
print(f'True Positives: {true_positives.shape[0]}')
print(f'False Positives: {false_positives.shape[0]}')
print(f'True Negatives: {true_negatives.shape[0]}')
print(f'False Negatives: {false_negatives.shape[0]}')

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate confusion matrix
cm = confusion_matrix(labels_results.is_photo, labels_results.is_photo_pred)

number_true_positives = true_positives.shape[0]
number_false_positives = false_positives.shape[0]
number_true_negatives = true_negatives.shape[0]
number_false_negatives = false_negatives.shape[0]

sensitivity = number_true_positives / positives.shape[0]
specificity = number_true_negatives / negatives.shape[0]
precision = number_true_positives / (number_true_positives + number_false_positives)
miss_rate = number_false_negatives / positives.shape[0]
f1_score = 2 * (precision * sensitivity) / (precision + sensitivity)

print("Confusion Matrix:")

plt.figure(figsize=(8,6))
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                          [number_false_negatives, number_true_positives]]
sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
            xticklabels=['Predicted Negative', 'Predicted Positive'], 
            yticklabels=['Actual Negative', 'Actual Positive'])
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

print(f'True Positives: {number_true_positives}')
print(f'False Positives: {number_false_positives}')
print(f'True Negatives: {number_true_negatives}')
print(f'False Negatives: {number_false_negatives}')
print(f'\nSensitivity (Recall): {sensitivity:.4f}')
print(f'Specificity: {specificity:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Miss Rate (False Negative Rate): {miss_rate:.4f}')
print(f'F1 Score: {f1_score:.4f}')

In [None]:
plt.figure(figsize=(15,8))
gs = plt.GridSpec(1, 2, width_ratios=[2, 1])

plt.subplot(gs[0])
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                         [number_false_negatives, number_true_positives]]
heatmap = sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
           xticklabels=['Predicted Negative', 'Predicted Positive'], 
           yticklabels=['Actual Negative', 'Actual Positive'],
           cbar_kws={'label': 'Number of Instances'})
plt.title('Confusion Matrix')

plt.subplot(gs[1])
plt.axis('off')
metrics_text = (f'Performance Metrics:\n\n'
               f'True Positives: {number_true_positives}\n'
               f'False Positives: {number_false_positives}\n'
               f'True Negatives: {number_true_negatives}\n'
               f'False Negatives: {number_false_negatives}\n\n'
               f'Sensitivity: {sensitivity:.4f}\n'
               f'Specificity: {specificity:.4f}\n'
               f'Precision: {precision:.4f}\n'
               f'Miss Rate: {miss_rate:.4f}\n'
               f'F1 Score: {f1_score:.4f}')
plt.text(0, 0.5, metrics_text, fontsize=10, 
        verticalalignment='center')

plt.suptitle('Photography Detection: Confusion Matrix and Performance Metrics Based on is_photo Label as Ground Truth', fontsize=16)
plt.tight_layout()
output_path = data_path / 'confusion_matrix_metrics_is_photo.pdf'
plt.savefig(output_path)
plt.close()

### Calculate sensitivity and specificity for church predictions and get lists images with positive church predictions:

In [None]:
positive_bools = labels_results.church == 1
negative_bools = labels_results.church == 0
positive_pred_bools = labels_results.with_church_pred == 1
negative_pred_bools = labels_results.with_church_pred == 0

positives = labels_results[positive_bools]
negatives = labels_results[negative_bools]
true_positives = labels_results[positive_bools & positive_pred_bools]
true_negatives = labels_results[negative_bools & negative_pred_bools]

false_negatives = labels_results[positive_bools & negative_pred_bools]
false_positives = labels_results[negative_bools & positive_pred_bools]

sensitivity = true_positives.shape[0] / positives.shape[0]
print('sensitivity:')
print(sensitivity)

specificity = true_negatives.shape[0] / negatives.shape[0]
print('specificity:')
print(specificity)

In [None]:
print(f'True Positives: {true_positives.shape[0]}')
print(f'False Positives: {false_positives.shape[0]}')
print(f'True Negatives: {true_negatives.shape[0]}')
print(f'False Negatives: {false_negatives.shape[0]}')

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate confusion matrix
cm = confusion_matrix(labels_results.church, labels_results.with_church_pred)

number_true_positives = true_positives.shape[0]
number_false_positives = false_positives.shape[0]
number_true_negatives = true_negatives.shape[0]
number_false_negatives = false_negatives.shape[0]

sensitivity = number_true_positives / positives.shape[0]
specificity = number_true_negatives / negatives.shape[0]
precision = number_true_positives / (number_true_positives + number_false_positives)
miss_rate = number_false_negatives / positives.shape[0]
f1_score = 2 * (precision * sensitivity) / (precision + sensitivity)

print("Confusion Matrix:")

plt.figure(figsize=(8,6))
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                          [number_false_negatives, number_true_positives]]
sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
            xticklabels=['Predicted Negative', 'Predicted Positive'], 
            yticklabels=['Actual Negative', 'Actual Positive'])
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

print(f'True Positives: {number_true_positives}')
print(f'False Positives: {number_false_positives}')
print(f'True Negatives: {number_true_negatives}')
print(f'False Negatives: {number_false_negatives}')
print(f'\nSensitivity (Recall): {sensitivity:.4f}')
print(f'Specificity: {specificity:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Miss Rate (False Negative Rate): {miss_rate:.4f}')
print(f'F1 Score: {f1_score:.4f}')

In [None]:
plt.figure(figsize=(15,8))
gs = plt.GridSpec(1, 2, width_ratios=[2, 1])

plt.subplot(gs[0])
confusion_matrix_data = [[number_true_negatives, number_false_positives], 
                         [number_false_negatives, number_true_positives]]
heatmap = sns.heatmap(confusion_matrix_data, annot=True, fmt='d', 
           xticklabels=['Predicted Negative', 'Predicted Positive'], 
           yticklabels=['Actual Negative', 'Actual Positive'],
           cbar_kws={'label': 'Number of Instances'})
plt.title('Confusion Matrix')

plt.subplot(gs[1])
plt.axis('off')
metrics_text = (f'Performance Metrics:\n\n'
               f'True Positives: {number_true_positives}\n'
               f'False Positives: {number_false_positives}\n'
               f'True Negatives: {number_true_negatives}\n'
               f'False Negatives: {number_false_negatives}\n\n'
               f'Sensitivity: {sensitivity:.4f}\n'
               f'Specificity: {specificity:.4f}\n'
               f'Precision: {precision:.4f}\n'
               f'Miss Rate: {miss_rate:.4f}\n'
               f'F1 Score: {f1_score:.4f}')
plt.text(0, 0.5, metrics_text, fontsize=10, 
        verticalalignment='center')

plt.suptitle('Church Detection: Confusion Matrix and Performance Metrics Based on the church Label as Ground Truth', fontsize=16)
plt.tight_layout()
output_path = data_path / 'confusion_matrix_metrics_with_church.pdf'
plt.savefig(output_path)
plt.close()

In [None]:
false_positives

In [None]:
false_negatives

In [None]:
false_curch_path = root_path / 'false_church_pred'
church_false_positives_path = false_curch_path / 'false_positives'
church_false_negatives_path = false_curch_path / 'false_negatives'

In [None]:
church_false_positives_path

In [None]:
# Move image files that predicted to not be landscape photographies: 
for idx, row in false_positives.iterrows():
    print(idx)
    img_id = row['image_id']
    is_photo = row['is_photo_pred']
    #print(is_photo)
    file_name = 'BernerOberland' + img_id + '.tif'
    print(file_name)
    source_path = tif_data_path / file_name
    dest_path = church_false_positives_path / file_name
    print(source_path)
    print(dest_path)
    shutil.copy(source_path, dest_path)


In [None]:
# Move image files that predicted to not be landscape photographies: 
for idx, row in false_negatives.iterrows():
    print(idx)
    img_id = row['image_id']
    is_photo = row['is_photo_pred']
    #print(is_photo)
    file_name = 'BernerOberland' + img_id + '.tif'
    source_path = tif_data_path / file_name
    dest_path = church_false_negatives_path / file_name
    print(dest_path)
    shutil.copy(source_path, dest_path)

### Visually inspect the images in the two folders!

Visually verified all classified images, false negatives are all images with non-recognisable persons (according to my judgement).

## Save labels and results:

In [None]:
labels_results

In [None]:
# Add image ids that will remain string type even when saved to csv and reloaded:
labels = list(labels_results.image_id)
new_labels = img_idc.complete_image_ids(labels)
labels_results['image_id_str'] = new_labels
labels_results

In [None]:
current_timestamp = pd.Timestamp.now()
current_timestamp

In [None]:
current_date_time = current_timestamp.strftime('%Y-%m-%d %H:%M')
current_date_time

In [None]:
#labels_results.to_csv(data_path /'results_img_analysis_minicpm_2025.07.17.csv')
results_file_name = 'results_img_analysis_minicpm_' + current_date_time + '.csv'
labels_results.to_csv(data_path /results_file_name)