# Enriching countries to locations

## Importing libraries

In [2]:
import os
import re
import json
import pandas as pd
import pycountry
import reverse_geocoder as rg

## Testing the libraries

In [3]:
# Testing the reverse_geocoder library
coordinates = (51.5214588,-0.1729636)
rg.search(coordinates)

Loading formatted geocoded file...


[{'lat': '51.51116',
  'lon': '-0.18426',
  'name': 'Bayswater',
  'admin1': 'England',
  'admin2': 'Greater London',
  'cc': 'GB'}]

In [4]:
# Testing the pycountry library
pycountry.countries.get(alpha_2='GB')

Country(alpha_2='GB', alpha_3='GBR', flag='🇬🇧', name='United Kingdom', numeric='826', official_name='United Kingdom of Great Britain and Northern Ireland')

## Class for enriching countries to locations

In [5]:
class CountryEnricher:
    """Enriches JSON files with country information based on coordinates."""
    def __init__(self, input_dir, output_dir):
        self.input_dir = input_dir
        self.output_dir = output_dir
        self.json_files = {}
        self.coordinates = [] # Batching for faster results, 5 x times faster now
        self.file_map = {} # Batching for faster results, 5 x times faster now
        self.pattern = r'singleplayer_(.+?)\.json'
    
    def load_and_prepare_files(self):
        json_paths = [f for f in os.listdir(self.input_dir) if f.endswith('.json') and 'singleplayer' in f]
        for file in json_paths:
            match = re.search(self.pattern, file)
            if match:
                image_id = match.group(1)
                with open(os.path.join(self.input_dir, file), 'r') as f:
                    json_data = json.load(f)
                self.json_files[image_id] = json_data
                if 'coordinates' in json_data:
                    coord = tuple(json_data['coordinates'])
                    self.coordinates.append(coord)
                    self.file_map[coord] = image_id
    
    def enrich_with_country_info(self):
        results = rg.search(self.coordinates)
        for result, coord in zip(results, self.coordinates):
            image_id = self.file_map[coord]
            country_code = result['cc']
            country = pycountry.countries.get(alpha_2=country_code)
            country_name = country.name if country else 'Unknown'
            self.json_files[image_id]['country_code'] = country_code
            self.json_files[image_id]['country_name'] = country_name
    
    def save_enriched_files(self):
        os.makedirs(self.output_dir, exist_ok=True)
        for image_id, data in self.json_files.items():
            file_name = "geoguessr_result_singleplayer_" + image_id + ".json"
            file_path = os.path.join(self.output_dir, file_name)
            with open(file_path, 'w') as f:
                json.dump(data, f, indent=4)
    
    def process(self):
        self.load_and_prepare_files()
        self.enrich_with_country_info()
        self.save_enriched_files()


## Enrich the json files with the class

In [6]:
input_dir = '../../1_data collection/data/'
output_dir = '../01_enriching/data/'
geo_enricher = CountryEnricher(input_dir, output_dir)
geo_enricher.process()