In [1]:
from dotenv import load_dotenv
from service.s3_storage_service import S3StorageService
from storage.s3_client import S3Client

load_dotenv() # Load environment variable with the bucket name. 
s3_client = S3Client().get_client()
s3_service = S3StorageService(s3_client)

In [2]:
def extract_station_locations(s3_service, metadata_key, guids_key, output_key):
    """Extracts station locations and saves them to a new JSON file on S3."""
    # Load data from S3
    station_metadata = s3_service.load_json_from_s3(metadata_key)
    processed_guids = s3_service.load_json_from_s3(guids_key)
    
    # Create a mapping of referenceId to GUID
    guid_map = {item['referenceId']: item['guid'] for item in processed_guids}
    
    # List to store extracted information
    stations_locations = []
    
    # Iterate over each station in metadata
    for station in station_metadata:
        if station['stationReference'] in guid_map:
            guid = guid_map[station['stationReference']]
            lat = station.get('lat')
            long = station.get('long')
            riverName = station.get('riverName')
            town = station.get('town')
            referenceId = station['stationReference']
            label = station.get('label')
            catchmentName = station.get('catchmentName')
            
            # Construct the data dictionary, including only existing entries
            station_data = {
                'guid': guid,
                'referenceId': referenceId,
                'lat': lat,
                'long': long,
            }
            if riverName:
                station_data['riverName'] = riverName
            if town:
                station_data['town'] = town
            if label:
                station_data['label'] = label
            if catchmentName:
                station_data['catchmentName'] = catchmentName
            
            # Append the data
            stations_locations.append(station_data)
    
    # Save the data to S3
    s3_service.save_json_to_s3(output_key, stations_locations)




In [3]:
metadata_key = 'flood_stations_metadata/project_stations_metadata.json'
guids_key = 'flood_stations_metadata/processed_guids_refs.json'
output_key = 'flood_stations_metadata/stations_locations.json'

extract_station_locations(s3_service, metadata_key, guids_key, output_key)