In [1]:
import json
import sys
import os
import cv2
import numpy as np

sys.path.insert(0, '../../')
from data_loader import get_data_to_load, update_data_to_load


files = get_data_to_load(loading_file='../03_mapping/data_list', file_location='../01_enriching/data/', image_file_location='../../1_data_collection/data/')

original_order = files.copy()

# sort files by file name
files.sort()

images = list(filter(lambda x: x.endswith('.png'), files))
jsons = list(filter(lambda x: x.endswith('.json'), files))

print(len(images))

filtered_files = []

variance_map = {}
# For easier visualization
image_to_country_map = {}

for image_path, json_path in zip(images, jsons):
  # Load an image
  image = cv2.imread(image_path)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  
  # For easier visualization
  country_name = None
  with open(json_path, 'r', encoding='utf8') as f:
    data = json.load(f)
    country_name = data['country_name']

  # Compute means and variances
  variance_colors = np.var(image, axis=(0, 1))
  
  max_variance = np.max(variance_colors)
  
  variance_map[image_path] = max_variance
  # For easier visualization
  image_to_country_map[image_path] = country_name
  
  if max_variance > 500:
    filtered_files.append(image_path)
    filtered_files.append(json_path)
  else:
    print("Too low max RGB variance:", image_path)
    print("Corresponding result:", json_path)
    print("Max RGB variance:", max_variance)
    
filtered_images = list(filter(lambda x: x.endswith('.png'), filtered_files))
print(len(filtered_images))

with open('variance_map', 'w', encoding='utf8') as f:
    json.dump(variance_map, f, ensure_ascii=False)
# For easier visualization
with open('image_to_country_map', 'w', encoding='utf8') as f:
    json.dump(image_to_country_map, f, ensure_ascii=False)
    
# Sort back to original order (some files were removed)
filtered_files = sorted(filtered_files, key=lambda x: original_order.index(x))

update_data_to_load(filtered_files, old_loading_file='../03_mapping/data_list', file_location='../01_enriching/data/', image_file_location='../../1_data_collection/data/')

23
Too low max RGB variance: ../../1_data_collection/data/geoguessr_location_singleplayer_0AX99V7ji4qZzWyq_0.png
Corresponding result: ../01_enriching/data/geoguessr_result_singleplayer_0AX99V7ji4qZzWyq_0.json
Max RGB variance: 792.3439031402268
22
