In [6]:
import os
import pandas as pd
import unicodedata

def eliminar_acentos(texto: str)-> str:
    texto_normalizado = unicodedata.normalize('NFKD', texto)
    texto_sin_acentos = ''.join(c for c in texto_normalizado if not unicodedata.combining(c))
    return texto_sin_acentos


script_dir = os.path.join(os.getcwd())
databases = os.path.join(script_dir, "..", "databases")

base = "ubigeo_peru_2016_{}.csv"
departamentos = pd.read_csv(os.path.join(databases, base.format("departamentos")), encoding='utf-8', dtype=str)
provincias = pd.read_csv(os.path.join(databases, base.format("provincias")), encoding='utf-8', dtype=str)
distritos = pd.read_csv(os.path.join(databases, base.format("distritos")), encoding='utf-8', dtype=str)

In [7]:
from collections import defaultdict

def crear_diccionario_equivalencias(df: pd.DataFrame)-> dict:
    final_dict = defaultdict(dict)
    for nombre in list(df["name"]):
        final_dict[eliminar_acentos(nombre).upper()] = nombre
    
    return dict(final_dict)

dep_dict = crear_diccionario_equivalencias(departamentos)
prov_dict = crear_diccionario_equivalencias(provincias)
dist_dict = crear_diccionario_equivalencias(distritos)

In [8]:
from pprint import pprint

pprint(dep_dict)

{'AMAZONAS': 'Amazonas',
 'ANCASH': 'Áncash',
 'APURIMAC': 'Apurímac',
 'AREQUIPA': 'Arequipa',
 'AYACUCHO': 'Ayacucho',
 'CAJAMARCA': 'Cajamarca',
 'CALLAO': 'Callao',
 'CUSCO': 'Cusco',
 'HUANCAVELICA': 'Huancavelica',
 'HUANUCO': 'Huánuco',
 'ICA': 'Ica',
 'JUNIN': 'Junín',
 'LA LIBERTAD': 'La Libertad',
 'LAMBAYEQUE': 'Lambayeque',
 'LIMA': 'Lima',
 'LORETO': 'Loreto',
 'MADRE DE DIOS': 'Madre de Dios',
 'MOQUEGUA': 'Moquegua',
 'PASCO': 'Pasco',
 'PIURA': 'Piura',
 'PUNO': 'Puno',
 'SAN MARTIN': 'San Martín',
 'TACNA': 'Tacna',
 'TUMBES': 'Tumbes',
 'UCAYALI': 'Ucayali'}


In [10]:
import pprint
base_out_path = os.path.join(script_dir, "..", "utils")

def write_to_file(final_dict: dict, level: str)-> None:
    out_path = os.path.join(base_out_path, f"{level + "_equivalencias"}.py")
    with open(out_path, mode="w", encoding="utf-8") as f:
        f.write(f"{(level + "_equivalencias").upper()} = ")
        pp = pprint.PrettyPrinter(stream=f, width=200, compact=True)
        pp.pprint(final_dict)

write_to_file(dep_dict, "departamentos")
write_to_file(prov_dict, "provincias")
write_to_file(dist_dict, "distritos")
        
# Then they get pretty formatted with black ubigeos_peru/ubigeos_peru/resources/distritos.py