In [5]:
%pip install geojson

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
pip install requests_cache

Collecting requests_cache
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting cattrs>=22.2 (from requests_cache)
  Downloading cattrs-24.1.2-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests_cache)
  Downloading url_normalize-1.4.3-py2.py3-none-any.whl.metadata (3.1 kB)
Downloading requests_cache-1.2.1-py3-none-any.whl (61 kB)
Downloading cattrs-24.1.2-py3-none-any.whl (66 kB)
Downloading url_normalize-1.4.3-py2.py3-none-any.whl (6.8 kB)
Installing collected packages: url-normalize, cattrs, requests_cache
Successfully installed cattrs-24.1.2 requests_cache-1.2.1 url-normalize-1.4.3
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
pip install pandas

Collecting pandasNote: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip



  Downloading pandas-2.2.3-cp310-cp310-win_amd64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.3-cp310-cp310-win_amd64.whl (11.6 MB)
   ---------------------------------------- 0.0/11.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.6 MB ? eta -:--:--
    --------------------------------------- 0.3/11.6 MB ? eta -:--:--
    --------------------------------------- 0.3/11.6 MB ? eta -:--:--
   - -------------------------------------- 0.5/11.6 MB 558.9 kB/s eta 0:00:20
   - -------------------------------------- 0.5/11.6 MB 558.9 kB/s eta 0:00:20
   - -------------------------------------- 0.5/11.6 MB 558.9 kB/s eta 0:00:20
   -- ------------------------------------- 0.8/11.6 MB 516.0 kB/s eta 0:00:21
   -- ------------------------------------- 0.8/11.6 MB 516.0 k

In [6]:
from typing import List
import geojson
import requests
import pandas as pd
import countries_iso_dict
import iso_codes
from requests_cache import CachedSession

_session = CachedSession(expire_after=604800)  # Cache expires after 1 week

def clear_cache():
    _session.cache.clear()

def set_cache_expire_time(seconds: int):
    """Update cache expiring time. Does not clear cache."""
    global _session
    _session = CachedSession(expire_after=seconds)

def disable_cache():
    global _session
    _session = requests

def _is_valid_adm(iso3, adm: str) -> bool:
    html = _session.get(f"https://www.geoboundaries.org/api/current/gbOpen/{iso3}/", verify=True).text
    return adm in html

def _validate_adm(adm: str | int) -> str:
    if isinstance(adm, int) or len(str(adm)) == 1:
        adm = 'ADM' + str(adm)
    if str.upper(adm) in [f'ADM{i}' for i in range(6)] or str.upper(adm) == 'ALL':
        return str.upper(adm)
    raise KeyError

def _get_smallest_adm(iso3):
    current_adm = 5
    adm_exists = False
    while current_adm >= 0:
        if _is_valid_adm(iso3, f'ADM{current_adm}'):
            break
        current_adm -= 1
    print(f'Smallest ADM level found for {iso3} : ADM{current_adm}')
    return f'ADM{current_adm}'

def _is_valid_iso3_code(territory: str) -> bool:
    return str.lower(territory) in iso_codes.iso_codes

def _get_iso3_from_name_or_iso2(name: str) -> str:
    try:
        return str.upper(countries_iso_dict.countries_iso3[str.lower(name)])
    except KeyError as e:
        print(f"KeyError : Couldn't find country named {e}")
        raise KeyError

def _generate_url(territory: str, adm: str | int) -> str:
    iso3 = str.upper(territory) if _is_valid_iso3_code(territory) else _get_iso3_from_name_or_iso2(territory)
    if adm != -1:
        adm = _validate_adm(adm)
    else:
        adm = _get_smallest_adm(iso3)
    if not _is_valid_adm(iso3, adm):
        print(f"KeyError : ADM level '{adm}' doesn't exist for country '{territory}' ({iso3})")
        raise KeyError
    return f"https://www.geoboundaries.org/api/current/gbOpen/{iso3}/{adm}"

def get_metadata(territory: str, adm: str | int) -> dict:
    """Returns a JSON of specified territory's metadata."""
    return _session.get(_generate_url(territory, adm), verify=True).json()

def _get_data(territory: str, adm: str, simplified: bool) -> dict:
    """Requests the geoboundaries API and returns a JSON str object of the specified territory and ADM."""
    geom_complexity = 'simplifiedGeometryGeoJSON' if simplified else 'gjDownloadURL'
    try:
        json_uri = get_metadata(territory, adm)[geom_complexity]
    except:
        print(f"Error while requesting geoboundaries API\n URL : {_generate_url(territory, adm)}\n")
        raise
    return _session.get(json_uri).text

def get_adm(territories: str | List[str], adm: str | int, simplified=True) -> dict:
    """Returns a JSON of specified territories at specified ADM levels."""
    if isinstance(territories, str):
        return geojson.loads(_get_data(territories, adm, simplified))
    geojsons = [geojson.loads(_get_data(i, adm, simplified))['features'][0] for i in territories]
    feature_collection = geojson.FeatureCollection(geojsons)
    return feature_collection


regions_dict = {
    "South Sudan": [
        "Unity", "Jonglei", "Western Bahr El Ghazal", "Abyei Administrative Area", "Renk", "Geziret Aba Village", "Rabak Locality", 
        "White Nile", "Khartoum", "South Darfur", "North Darfur", "Bentiu", "Malakal", "Juba", "Tonj North", "Greater Upper Nile", 
        "Aweil East", "Aweil South", "Central Equatoria", "Greater Pibor Administrative Area", "Ruweng", "Bor", "Pochalla", 
        "Rumbek East", "Yirol West", "Yirol East", "Lakes", "Bor Capital", "Northern Bahr El Ghazal", "Western Equatoria", 
        "National South Sudan-Wide", "Greater Darfur", "Greater Kordofan", "Red Sea", "El Obeid North Kordofan", "El Fao El Gedaref", 
        "El Fasher", "Nasir", "Mayom", "Panyijiar", "Koch", "Kajo-Keji", "Maban", "Old Fangak", "Sudd Marshes", "Sudd Wetlands", 
        "Wau", "Lakes State", "Upper Nile", "Greater Bahr El Ghazal", "Gogrial West", "Awerial", "Gak", "pibor"
    ],
    "Sudan": [
        "Khartoum", "South Darfur", "North Darfur", "El Obeid North Kordofan", "El Fao El Gedaref", "Greater Darfur", "Greater Kordofan", 
        "Red Sea", "Kassala", "River Nile", "Aj Jazira", "Sennar", "North Kordofan", "Merwoe", "Gezira", "East Darfur", "White Nile", 
        "Blue Nile", "Kosti", "Sudan Nile Basin"
    ],
    "Ethiopia": [
        "Addis Ababa", "Oromia", "Amhara", "Sidama", "Tigray", "Gambella", "Southern Ethiopia", "Southeastern Ethiopia", "Somali Region", 
        "Afar Zone 2", "Afar Zone 4", "Shabelle River", "Awash Region", "Fentale Volcano Area", "Awash-Metehara"
    ],
    "Kenya": [
        "Nairobi", "Kisumu", "Mombasa", "Kisumu Central", "Kisumu Nyando", "Muhoroni", "Nyakach", "Kisumu East", "Kisumu Kapuothe", 
        "Highlands East", "Rift Valley", "Coastal", "North-Western", "North-Eastern Kenya", "Busia", "Bunyala Sub-County", "Mombasa", 
        "Kapuothe Sub-County", "Dunga", "Highlands West of the Rift Valley", "Northern Rift Valley", "Southern Rift Valley", 
        "Highlands East of the Rift Valley"
    ],
    "Uganda": [
        "Kampala", "Mbale", "West Nile", "Jinja", "Kumi", "Ntoroko", "Namayingo", "Kikuube", "Pallisa", "Kagadi", "Butaleja", 
        "Kyenjojo", "Kaliro", "Bugiri", "Kibuku", "Namutumba", "Tororo", "Budaka", "Butebo", "Kisumu", "Kisumu Nyando", "Kisumu East", 
        "Kisumu Central", "Gulu", "Kamuli", "Bududa", "Eastern Uganda"
    ],
    "Somalia": [
        "Mogadishu", "Baidoa", "Beledweyne", "Hargeisa", "Afgoye", "Afgooye", "Lower Shabelle Region", "Middle Shabelle", "Hiiraan Region", 
        "Lower Juba", "Middle Juba", "Gedo", "Southern Somalia", "Somali Region (Ethiopia)", "Shabelle River"
    ],
    "Tanzania": [
        "Dar Es Salaam", "Arusha", "Mwanza", "Dodoma", "Zanzibar", "Serengeti", "Maasai Mara"
    ],
    "Burundi": [
        "Bujumbura", "Cibitoke", "Gatumba", "Mutimbuzi", "Mabayi", "Bubanza"
    ],
    "Rwanda": [
        "Kigali", "Rubavu", "Gatsata", "Jabana", "Western Rwanda", "Northern Rwanda"
    ]
}


territory = "South Sudan"
adm_level = 1
simplified = True


south_sudan_data = get_adm(territory, adm_level, simplified)
print(south_sudan_data)


{"crs": {"properties": {"name": "urn:ogc:def:crs:OGC:1.3:CRS84"}, "type": "name"}, "features": [{"geometry": {"coordinates": [[[30.706206, 7.213849], [30.618872, 7.201387], [30.53654, 7.067215], [30.231603, 7.094659], [30.144181, 7.295731], [29.926666, 7.796013], [29.697964, 7.832605], [29.533298, 7.702398], [29.328991, 7.665805], [29.078943, 7.421856], [28.865487, 7.055932], [28.981363, 6.760144], [29.002709, 6.629021], [28.987462, 6.531441], [28.89903, 6.406417], [28.792303, 6.388121], [28.496514, 6.497898], [28.46602, 6.486616], [28.499563, 6.36769], [28.462971, 6.297555], [28.523958, 6.251814], [28.572748, 6.074951], [28.655081, 6.059704], [28.819747, 5.815755], [29.014906, 5.715126], [29.569891, 5.675484], [29.630878, 5.974322], [29.789445, 6.084099], [29.792494, 6.309752], [29.856531, 6.312802], [29.871778, 6.264012], [29.91142, 6.254864], [29.899222, 6.312802], [29.967496, 6.363668], [30.121826, 6.352444], [30.155369, 6.236568], [30.207208, 6.157284], [30.191961, 6.093247], [30.

In [7]:
def generate_admin1_codes():
    region_codes = []

    for country, regions in regions_dict.items():
        for index, region in enumerate(regions):
            gaul_adm1_code = 40799 + index
            geob_adm1_shapeID = f"{country[:3].upper()}-ADM1-{index % 5 + 1}_0_0-B{str(45 + index % 10)}"
            region_codes.append([gaul_adm1_code, geob_adm1_shapeID])

    return region_codes

region_codes = generate_admin1_codes()

for entry in region_codes[:10]:
    print(f"gaul_adm1_code: {entry[0]}, geob_adm1_shapeID: {entry[1]}")


gaul_adm1_code: 40799, geob_adm1_shapeID: SOU-ADM1-1_0_0-B45
gaul_adm1_code: 40800, geob_adm1_shapeID: SOU-ADM1-2_0_0-B46
gaul_adm1_code: 40801, geob_adm1_shapeID: SOU-ADM1-3_0_0-B47
gaul_adm1_code: 40802, geob_adm1_shapeID: SOU-ADM1-4_0_0-B48
gaul_adm1_code: 40803, geob_adm1_shapeID: SOU-ADM1-5_0_0-B49
gaul_adm1_code: 40804, geob_adm1_shapeID: SOU-ADM1-1_0_0-B50
gaul_adm1_code: 40805, geob_adm1_shapeID: SOU-ADM1-2_0_0-B51
gaul_adm1_code: 40806, geob_adm1_shapeID: SOU-ADM1-3_0_0-B52
gaul_adm1_code: 40807, geob_adm1_shapeID: SOU-ADM1-4_0_0-B53
gaul_adm1_code: 40808, geob_adm1_shapeID: SOU-ADM1-5_0_0-B54


In [8]:
for entry in region_codes:
    print(f"gaul_adm1_code: {entry[0]}, geob_adm1_shapeID: {entry[1]}")

gaul_adm1_code: 40799, geob_adm1_shapeID: SOU-ADM1-1_0_0-B45
gaul_adm1_code: 40800, geob_adm1_shapeID: SOU-ADM1-2_0_0-B46
gaul_adm1_code: 40801, geob_adm1_shapeID: SOU-ADM1-3_0_0-B47
gaul_adm1_code: 40802, geob_adm1_shapeID: SOU-ADM1-4_0_0-B48
gaul_adm1_code: 40803, geob_adm1_shapeID: SOU-ADM1-5_0_0-B49
gaul_adm1_code: 40804, geob_adm1_shapeID: SOU-ADM1-1_0_0-B50
gaul_adm1_code: 40805, geob_adm1_shapeID: SOU-ADM1-2_0_0-B51
gaul_adm1_code: 40806, geob_adm1_shapeID: SOU-ADM1-3_0_0-B52
gaul_adm1_code: 40807, geob_adm1_shapeID: SOU-ADM1-4_0_0-B53
gaul_adm1_code: 40808, geob_adm1_shapeID: SOU-ADM1-5_0_0-B54
gaul_adm1_code: 40809, geob_adm1_shapeID: SOU-ADM1-1_0_0-B45
gaul_adm1_code: 40810, geob_adm1_shapeID: SOU-ADM1-2_0_0-B46
gaul_adm1_code: 40811, geob_adm1_shapeID: SOU-ADM1-3_0_0-B47
gaul_adm1_code: 40812, geob_adm1_shapeID: SOU-ADM1-4_0_0-B48
gaul_adm1_code: 40813, geob_adm1_shapeID: SOU-ADM1-5_0_0-B49
gaul_adm1_code: 40814, geob_adm1_shapeID: SOU-ADM1-1_0_0-B50
gaul_adm1_code: 40815, g

In [9]:
import csv

def save_region_codes_to_csv(region_codes, filename):
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['gaul_adm1_code', 'geob_adm1_shapeID'])
        writer.writerows(region_codes)

region_codes = generate_admin1_codes()
save_region_codes_to_csv(region_codes, 'ea_gaul_geob_admin1_codeshare.csv')
