In [1]:
import boto3
from datetime import datetime, timezone
import json
from os import listdir
from os.path import isfile, join
import urllib3

s3_client = boto3.client("s3")
LOCAL_FILE_SYS = "/tmp"
S3_BUCKET = "openfisheries"
API_LINK = "https://www.openfisheries.org/api/landings/"
LIMIT_ROWS = 30

def load_species_json(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

def create_species_mapping(species_data):
    species_mapping = {}
    for species in species_data:
        a3_code = species.get("a3_code")
        if a3_code:
            species_mapping[a3_code] = species
    return species_mapping

def get_species():
    http = urllib3.PoolManager()
    data = []
    try:
        r = http.request(
            "GET",
            f"{API_LINK}species.json",
            retries=urllib3.util.Retry(1),
        )
        
        data = json.loads(r.data.decode("utf8"))
        with open(f"{LOCAL_FILE_SYS}/species.json", 'w') as file:
            json.dump(data, file)
        
        s3_client.upload_file(
            f"{LOCAL_FILE_SYS}/species.json",
            S3_BUCKET,
            "species.json"
        )
        
    except Exception as e:
        print(f"Error fetching species")
    
    return data

def get_species_details(a3_code_list, max_rows=LIMIT_ROWS):
    http = urllib3.PoolManager()
    successful_rows = 0
    species_data = load_species_json(f"{LOCAL_FILE_SYS}/species.json")
    
    for a3_code in a3_code_list:
        try:
            species_info = None
            for species in species_data:
                if species.get("a3_code") == a3_code:
                    species_info = species
                    break

            r = http.request(
                "GET",
                f"{API_LINK}species/{a3_code}.json",
                retries=urllib3.util.Retry(1),
            )

            data = json.loads(r.data.decode("utf8"))
            with open(f"{LOCAL_FILE_SYS}/{a3_code}.json", 'w') as file:
                json.dump(data, file)

            s3_client.upload_file(
                f"{LOCAL_FILE_SYS}/{a3_code}.json",
                S3_BUCKET,
                f"{a3_code}.json"
            )

            successful_rows += 1
            if successful_rows >= max_rows:
                break  # Break out of the loop once the desired number of successful rows is reached
                
        except:
            print(f"No endpoint for {a3_code} found.")

def lambda_handler(event, context):
    species_list = get_species()
    get_species_details([item["a3_code"] for item in species_list])

### Much much much more slower but more readable:

In [None]:
import boto3
import json
import urllib3

s3_client = boto3.client("s3")
S3_BUCKET = "openfisheries"
API_LINK = "https://www.openfisheries.org/api/landings/"
LIMIT_ROWS = 30

def upload_to_s3(data, s3_key):
    s3_client.put_object(Body=json.dumps(data), Bucket=S3_BUCKET, Key=s3_key)

def fetch_and_upload_data(api_url, s3_key):
    http = urllib3.PoolManager()
    global LIMIT_ROWS  # Move the global declaration here

    try:
        r = http.request(
            "GET", api_url,
            retries=urllib3.util.Retry(1),)
        data = json.loads(r.data.decode("utf8"))
        upload_to_s3(data, s3_key)
        print(f"{s3_key} added to S3. {LIMIT_ROWS} to go.")
        return data

    except:
        LIMIT_ROWS += 1  # Correct the syntax for incrementing LIMIT_ROWS
        print(f"No records of {s3_key} found.")
        return None

def lambda_handler(event, context):
    species_data = fetch_and_upload_data(f"{API_LINK}species.json", "species.json")     # Fetch species data
    if not species_data:
        return

    for species in species_data[:LIMIT_ROWS]:
        a3_code = species.get("a3_code")
        if a3_code:
            fetch_and_upload_data(f"{API_LINK}species/{a3_code}.json", f"{a3_code}.json")