In [1]:
import os
import pandas as pd

script_dir = os.path.join(os.getcwd())
databases = os.path.join(script_dir, "..", "databases")

all_path = os.path.join(databases, "equivalencia-ubigeos-oti-concytec.csv")
df = pd.read_csv(all_path, encoding='latin8', dtype=str)

df.head()


Unnamed: 0,cod_dep_inei,desc_dep_inei,cod_prov_inei,desc_prov_inei,cod_ubigeo_inei,desc_ubigeo_inei,cod_dep_reniec,desc_dep_reniec,cod_prov_reniec,desc_prov_reniec,cod_ubigeo_reniec,desc_ubigeo_reniec,cod_dep_sunat,desc_dep_sunat,cod_prov_sunat,desc_prov_sunat,cod_ubigeo_sunat,desc_ubigeo_sunat
0,1,AMAZONAS,101,CHACHAPOYAS,10101,CHACHAPOYAS,1,AMAZONAS,101,CHACHAPOYAS,10101,CHACHAPOYAS,1,AMAZONAS,101,CHACHAPOYAS,10101,CHACHAPOYAS
1,1,AMAZONAS,101,CHACHAPOYAS,10102,ASUNCION,1,AMAZONAS,101,CHACHAPOYAS,10102,ASUNCION,1,AMAZONAS,101,CHACHAPOYAS,10102,ASUNCION
2,1,AMAZONAS,101,CHACHAPOYAS,10103,BALSAS,1,AMAZONAS,101,CHACHAPOYAS,10103,BALSAS,1,AMAZONAS,101,CHACHAPOYAS,10103,BALSAS
3,1,AMAZONAS,101,CHACHAPOYAS,10104,CHETO,1,AMAZONAS,101,CHACHAPOYAS,10104,CHETO,1,AMAZONAS,101,CHACHAPOYAS,10104,CHETO
4,1,AMAZONAS,101,CHACHAPOYAS,10105,CHILIQUIN,1,AMAZONAS,101,CHACHAPOYAS,10105,CHILIQUIN,1,AMAZONAS,101,CHACHAPOYAS,10105,CHILIQUIN


## Departamentos

In [4]:
from pprint import pprint
from collections import defaultdict

# Departamento

def preprocess(df: pd.DataFrame, level: str) -> dict:
    base_list = ["cod_{}_inei", "cod_{}_reniec", "cod_{}_sunat", "desc_{}_inei", "desc_{}_reniec", "desc_{}_sunat"]
    df = df[[name.format(level).strip() for name in base_list]].drop_duplicates(subset= f"cod_{level}_inei")
    df = df.drop_duplicates(subset= f"cod_{level}_inei").dropna()

    final_dict = defaultdict(dict)
    for row in df.itertuples(index=False):
        final_dict["inei"][row[0]] = row[3]
        final_dict["reniec"][row[1]] = row[4]
        final_dict["sunat"][row[2]] = row[5]


    final_dict = {
        inst: {
            code: desc
            for code, desc in mapping.items()
            if pd.notnull(desc)   # o bien: desc == desc
        }
        for inst, mapping in final_dict.items()
    }
    return dict(final_dict)

departamentos = preprocess(df, level= "dep")
pprint(departamentos)


{'inei': {'01': 'AMAZONAS',
          '02': 'ANCASH',
          '03': 'APURIMAC',
          '04': 'AREQUIPA',
          '05': 'AYACUCHO',
          '06': 'CAJAMARCA',
          '07': 'CALLAO',
          '08': 'CUSCO',
          '09': 'HUANCAVELICA',
          '10': 'HUANUCO',
          '11': 'ICA',
          '12': 'JUNIN',
          '13': 'LA LIBERTAD',
          '14': 'LAMBAYEQUE',
          '15': 'LIMA',
          '16': 'LORETO',
          '17': 'MADRE DE DIOS',
          '18': 'MOQUEGUA',
          '19': 'PASCO',
          '20': 'PIURA',
          '21': 'PUNO',
          '22': 'SAN MARTIN',
          '23': 'TACNA',
          '24': 'TUMBES',
          '25': 'UCAYALI'},
 'reniec': {'01': 'AMAZONAS',
            '02': 'ANCASH',
            '03': 'APURIMAC',
            '04': 'AREQUIPA',
            '05': 'AYACUCHO',
            '06': 'CAJAMARCA',
            '07': 'CUSCO',
            '08': 'HUANCAVELICA',
            '09': 'HUANUCO',
            '10': 'ICA',
            '11': 'JUNIN',

## Provincia

In [5]:
provincias = preprocess(df, level="prov")
pprint(provincias)

{'inei': {'0101': 'CHACHAPOYAS',
          '0102': 'BAGUA',
          '0103': 'BONGARA',
          '0104': 'CONDORCANQUI',
          '0105': 'LUYA',
          '0106': 'RODRIGUEZ DE MENDOZA',
          '0107': 'UTCUBAMBA',
          '0201': 'HUARAZ',
          '0202': 'AIJA',
          '0203': 'ANTONIO RAYMONDI',
          '0204': 'ASUNCION',
          '0205': 'BOLOGNESI',
          '0206': 'CARHUAZ',
          '0207': 'CARLOS FERMIN FITZCARRALD',
          '0208': 'CASMA',
          '0209': 'CORONGO',
          '0210': 'HUARI',
          '0211': 'HUARMEY',
          '0212': 'HUAYLAS',
          '0213': 'MARISCAL LUZURIAGA',
          '0214': 'OCROS',
          '0215': 'PALLASCA',
          '0216': 'POMABAMBA',
          '0217': 'RECUAY',
          '0218': 'SANTA',
          '0219': 'SIHUAS',
          '0220': 'YUNGAY',
          '0301': 'ABANCAY',
          '0302': 'ANDAHUAYLAS',
          '0303': 'ANTABAMBA',
          '0304': 'AYMARAES',
          '0305': 'COTABAMBAS',
          '030

## Distritos

In [6]:
distritos = preprocess(df, level="ubigeo")
pprint(distritos)

{'inei': {'010101': 'CHACHAPOYAS',
          '010102': 'ASUNCION',
          '010103': 'BALSAS',
          '010104': 'CHETO',
          '010105': 'CHILIQUIN',
          '010106': 'CHUQUIBAMBA',
          '010107': 'GRANADA',
          '010108': 'HUANCAS',
          '010109': 'LA JALCA',
          '010110': 'LEIMEBAMBA',
          '010111': 'LEVANTO',
          '010112': 'MAGDALENA',
          '010113': 'MARISCAL CASTILLA',
          '010114': 'MOLINOPAMPA',
          '010115': 'MONTEVIDEO',
          '010116': 'OLLEROS',
          '010117': 'QUINJALCA',
          '010118': 'SAN FRANCISCO DE DAGUAS',
          '010119': 'SAN ISIDRO DE MAINO',
          '010120': 'SOLOCO',
          '010121': 'SONCHE',
          '010201': 'BAGUA',
          '010202': 'ARAMANGO',
          '010203': 'COPALLIN',
          '010204': 'EL PARCO',
          '010205': 'IMAZA',
          '010206': 'LA PECA',
          '010301': 'JUMBILLA',
          '010302': 'CHISQUILLA',
          '010303': 'CHURUJA',
        

## Writing to a file

In [None]:
import pprint
base_out_path = os.path.join(script_dir, "..", "resources")


def write_to_file(final_dict: dict, level: str)-> None:
    out_path = os.path.join(base_out_path, f"{level}.py")
    with open(out_path, mode="w", encoding="utf-8") as f:
        f.write(f"{level.upper()} = ")
        pp = pprint.PrettyPrinter(stream=f, width=200, compact=True)
        pp.pprint(final_dict)

write_to_file(departamentos, "departamentos")
write_to_file(provincias, "provincias")
write_to_file(distritos, "distritos")
        
# Then they get pretty formatted with black ubigeos_peru/ubigeos_peru/resources/distritos.py