In [1]:
import datetime
import gc
import glob
import os
import time
import xml.etree.ElementTree as ET

import numpy as np
import pandas as pd
from tqdm import tqdm

In [54]:
data_dir = './'
region = '01'
region_dir = os.path.join(data_dir, region)
final_dir = os.path.join(data_dir, 'result')
os.makedirs(final_dir, exist_ok=True)
print(data_dir, region_dir, final_dir)

./ ./01 ./result


In [12]:
def parse_xml(x):
    """
    Parse GAR XML file into pandas dataframe object
    """
    tree = ET.parse(x)
    root = tree.getroot()
    df = [child.attrib for child in root]
    df = pd.DataFrame.from_dict(df)
    return df

def cleanup(x):
    """
    Manual object cleaning
    """
    del x
    gc.collect()

def get_adms(df):
    """
    Get administrative "object-parent" relations into dictionary for later use
    """
    rftree = df[['OBJECTID', 'PARENTOBJID']].groupby(
        by='OBJECTID'
    )['PARENTOBJID'].apply(list).to_dict()
    return rftree

In [55]:
#ADDR_OBJ
fname = glob.glob(os.path.join(region_dir, 'AS_ADDR_OBJ_*.XML'))
fname = [x for x in fname if 'PARAMS' not in x and 'DIVISION' not in x]
if len(fname) != 1:
    msg = f'Please check file count for region {region_dir} there are {len(fname)} files'
    #logger.error(msg)
    raise Exception(msg)
fname = fname[0]
adobj = parse_xml(fname)
adobj = adobj[(adobj['ISACTUAL'] == '1') & (adobj['ISACTIVE'] == '1')]
#adobj.head()
#SHORT_NAMES
fname = glob.glob(os.path.join(data_dir, 'AS_ADDR_OBJ_TYPES_*.XML'))
if len(fname) != 1:
    msg = f'Please check file count for region {region_dir} there are {len(fname)} files'
    #logger.error(msg)
    raise Exception(msg)
fname = fname[0]
adobjt = parse_xml(fname)
#adobjt.head()
#MERGE SHORT_NAMES
adobj = adobj.merge(
    adobjt[['SHORTNAME', 'DESC', 'LEVEL']].rename(
        columns={
            'SHORTNAME': 'TYPENAME',
            'DESC': 'TYPELONGNAME'
        }
    ),
    on=['LEVEL', 'TYPENAME']
)
cleanup(adobjt)
#adobj.head()
#LEVELS_INFO
fname = glob.glob(os.path.join(data_dir, 'AS_OBJECT_LEVELS_*.XML'))
if len(fname) != 1:
    msg = f'Please check file count for region {region_dir} there are {len(fname)} files'
    #logger.error(msg)
    raise Exception(msg)
fname = fname[0]
lev = parse_xml(fname)
#MERGE LEVELS_INFO
adobj = adobj.merge(
    lev[['NAME', 'LEVEL']].rename(
        columns={
            'NAME': 'LEVELNAME'
        }
    ),
    on='LEVEL'
)
cleanup(lev)
#adobj.head()

In [78]:
#OKTMO
fname = glob.glob(os.path.join(region_dir, 'AS_ADDR_OBJ_PARAMS_*.XML'))
fname = fname[0]
adobjp = parse_xml(fname)
#adobjp.head()

#5" NAME="Почтовый индекс
#6" NAME="ОКАТО
#7" NAME="OKTMO
#10" NAME="Код КЛАДР

adobjp = adobjp[
    (
        (adobjp['TYPEID'] == '7') | (adobjp['TYPEID'] == '10')
        
    ) & (
        adobjp['CHANGEIDEND'] == '0'
    )
]
adobjp = adobjp[
    adobjp.ENDDATE.apply(
        lambda x: datetime.datetime.strptime(
            x, '%Y-%m-%d'
        ) > datetime.datetime.fromtimestamp(
            time.time()
        )
    )
]
#print(adobjp)

cladr = adobjp[adobjp['TYPEID'] == '10'][['OBJECTID', 'VALUE']].rename(
    columns={'VALUE': 'KLADR'}
).drop_duplicates().groupby(
    by='OBJECTID'
).agg(
    lambda x: x.to_list()
).to_dict('index')

adobjp = adobjp[adobjp['TYPEID'] == '7'][['OBJECTID', 'VALUE']].rename(
    columns={'VALUE': 'OKTMO'}
).drop_duplicates().groupby(
    by='OBJECTID'
).agg(
    lambda x: x.to_list()
).to_dict('index')
#adobjp
#cladr

#print(adobj)
adobj = adobj.set_index('OBJECTID') \
.join(pd.DataFrame.from_dict(adobjp, orient='index')) \
.join(pd.DataFrame.from_dict(cladr, orient='index')) \
.reset_index()
#print(adobj)

ValueError: columns overlap but no suffix specified: Index(['OKTMO'], dtype='object')

In [79]:
#READ HOUSES
fname = glob.glob(os.path.join(region_dir, 'AS_HOUSES_*.XML'))
fname = [x for x in fname if 'PARAMS' not in x]
if len(fname) != 1:
    msg = f'Please check file count for region {region_dir} there are {len(fname)} files'
    #logger.error(msg)
    raise Exception(msg)
fname = fname[0]
hous = parse_xml(fname)
hous = hous.rename(
    columns={
        'ADDTYPE1': 'HOUSETYPE1',
        'ADDTYPE2': 'HOUSETYPE2',
        'ADDNUM1': 'HOUSENUM1',
        'ADDNUM2': 'HOUSENUM2'
    }
)
if 'ISACTUAL' in hous.columns:
    hous = hous[(hous['ISACTUAL'] == '1') & (hous['ISACTIVE'] == '1')]
else:
    hous = hous[(hous['ISACTIVE'] == '1')]
#hous.head()
fname = glob.glob(os.path.join(data_dir, 'AS_HOUSE_TYPES_*.XML'))
if len(fname) != 1:
    msg = f'Please check file count for region {region_dir} there are {len(fname)} files'
    #logger.error(msg)
    raise Exception(msg)
fname = fname[0]
houst = parse_xml(fname)
houst = houst.rename(
    columns={
        'SHORTNAME': 'TYPENAME',
        'DESC': 'TYPELONGNAME',
        'ID': 'HOUSETYPE'
    }
)
#houst.head()
fname = glob.glob(os.path.join(data_dir, 'AS_ADDHOUSE_TYPES_*.XML'))
if len(fname) != 1:
    msg = f'Please check file count for region {region_dir} there are {len(fname)} files'
    #logger.error(msg)
    raise Exception(msg)
fname = fname[0]
housta = parse_xml(fname)
housta = housta.rename(
    columns={
        'SHORTNAME': 'TYPENAME',
        'DESC': 'TYPELONGNAME',
        'ID': 'HOUSETYPE'
    }
)
#housta.head()
hous = hous.merge(
    houst[[
        'HOUSETYPE', 'TYPENAME', 'TYPELONGNAME'
    ]].drop_duplicates(),
    on='HOUSETYPE'
)
if 'HOUSETYPE1' in hous.columns:
    hous = hous.merge(
        housta[[
            'HOUSETYPE', 'TYPENAME', 'TYPELONGNAME'
        ]].rename(
            columns={
                'HOUSETYPE': 'HOUSETYPE1'
            }
        ).drop_duplicates(),
        on='HOUSETYPE1',
        how='left',
        suffixes=(None, '1')
    )
else:
    hous['HOUSETYPE1'] = np.nan
    hous['TYPELONGNAME1'] = np.nan
    hous['HOUSENUM1'] = np.nan
    hous['TYPENAME1'] = np.nan
if 'HOUSETYPE2' in hous.columns:
    hous = hous.merge(
        housta[[
            'HOUSETYPE', 'TYPENAME', 'TYPELONGNAME'
        ]].rename(
            columns={
                'HOUSETYPE': 'HOUSETYPE2'
            }
        ).drop_duplicates(),
        on='HOUSETYPE2',
        how='left',
        suffixes=(None, '2')
    )
else:
    hous['HOUSETYPE2'] = np.nan
    hous['TYPELONGNAME2'] = np.nan
    hous['HOUSENUM2'] = np.nan
    hous['TYPENAME2'] = np.nan
cleanup(houst)
cleanup(housta)
#hous.head()

In [80]:
hous['LEVEL'] = '10'
hous['LEVELNAME'] = 'Здание/Сооружение'
hous['NAME'] = hous[['TYPELONGNAME', 'HOUSENUM']].apply(
    lambda x: (
        x['TYPELONGNAME'].lower() + ' '
        if x['TYPELONGNAME'] == x['TYPELONGNAME']
        else ''
    ) + x['HOUSENUM'],
    axis=1
)
hous['NAME1'] = hous[['TYPELONGNAME1', 'HOUSENUM1']].apply(
    lambda x: (
        x['TYPELONGNAME1'].lower() + ' '
        if x['TYPELONGNAME1'] == x['TYPELONGNAME1']
        else ''
    ) + (
        x['HOUSENUM1']
        if x['HOUSENUM1'] == x['HOUSENUM1']
        else ''
    ),
    axis=1
)
hous['NAME2'] = hous[['TYPELONGNAME2', 'HOUSENUM2']].apply(
    lambda x: (
        x['TYPELONGNAME2'].lower() + ' '
        if x['TYPELONGNAME2'] == x['TYPELONGNAME2']
        else ''
    ) + (
        x['HOUSENUM2']
        if x['HOUSENUM2'] == x['HOUSENUM2']
        else ''
    ),
    axis=1
)
#hous.head()

In [81]:
fname = glob.glob(os.path.join(region_dir, 'AS_HOUSES_PARAMS_*.XML'))
fname = fname[0]
hp = parse_xml(fname)
#print(hp.head())
hp = hp[
    (
        (hp['TYPEID'] == '7') | (hp['TYPEID'] == '10')
    ) & (
        hp['CHANGEIDEND'] == '0'
    )
]
hp = hp[
    hp.ENDDATE.apply(
        lambda x: datetime.datetime.strptime(
            x, '%Y-%m-%d'
        ) > datetime.datetime.fromtimestamp(
            time.time()
        )
    )
]
#hp.head()

cladr_h = hp[hp['TYPEID'] == '10'][['OBJECTID', 'VALUE']].rename(
    columns={'VALUE': 'KLADR'}
).drop_duplicates().groupby(
    by='OBJECTID'
).agg(
    lambda x: x.to_list()
).to_dict('index')

hp = hp[hp['TYPEID'] == '7'][['OBJECTID', 'VALUE']].rename(
    columns={'VALUE': 'OKTMO'}
).drop_duplicates().groupby(
    by='OBJECTID'
).agg(
    lambda x: x.to_list()
).to_dict('index')
#hp

#print(hous)
hous = hous.set_index('OBJECTID') \
.join(pd.DataFrame.from_dict(hp, orient='index')) \
.join(pd.DataFrame.from_dict(cladr_h, orient='index')) \
.reset_index()
#print(hous)

              ID   OBJECTID                            OBJECTGUID   CHANGEID  \
0          11093    1472973  6143953a-e27d-40d9-88ee-df15b09af1a4    4082543   
1          11129    1473000  024acb0a-3355-47ba-a949-f0561acc8bfe    4082598   
2          11180    1473048  a22aae34-32d0-4109-ad8c-070be80367c0    4082689   
3          11207    1473077  cafb1147-c791-4cd0-a021-f70dfa019193    4082739   
4          11210    1473080  2a466e3c-84fb-4a89-a783-20b7ab88a833    4082745   
...          ...        ...                                   ...        ...   
148340  71933864  103020878  368f2f43-095d-4381-af64-6392151a6c61  228963329   
148341  71937140  103034251  e003df11-95a5-41b0-b9f9-5874a924ccab  230164296   
148342  71937151  103034907  a95ed96d-1628-4182-a5fc-39d9a83a0ba6  230164967   
148343  71937164  103034967  ed6a4b55-dff6-46bd-b10d-07e02ff16a39  230165036   
148344  71937174  103035321  d827f93d-7898-4a4c-aa8c-d91fcbb84ac3  230165404   

       HOUSENUM HOUSETYPE OPERTYPEID PR

In [91]:
hadobj = pd.concat(
  [
      adobj[[
          'OBJECTID', 'OBJECTGUID', 'NAME', 'TYPENAME', 'LEVEL',
          'ISACTUAL', 'ISACTIVE', 'TYPELONGNAME', 'LEVELNAME',
          'OKTMO', 'KLADR'
      ]],
      hous[[
          'OBJECTID', 'OBJECTGUID', 'HOUSENUM', 'HOUSETYPE',
          'TYPENAME', 'TYPELONGNAME', 'HOUSENUM1', 'HOUSETYPE1',
          'TYPENAME1', 'TYPELONGNAME1', 'HOUSENUM2', 'HOUSETYPE2',
          'TYPENAME2', 'TYPELONGNAME2', 'ISACTUAL', 'ISACTIVE',
          'LEVEL', 'NAME', 'NAME1', 'NAME2', 'LEVELNAME',
          'OKTMO'#, 'KLADR'
      ]]
  ],
  sort=True,
  ignore_index=True
)
cleanup(adobj)
cleanup(hous)
#hadobj.head()
hadobj.to_csv(os.path.join(final_dir, f'{region}_hadobj.csv'), index=False)

In [None]:
#####CHAINS

In [92]:
def get_adms(df):
    """
    Get administrative "object-parent" relations into dictionary for later use
    """
    rftree = df[['OBJECTID', 'PARENTOBJID']].groupby(
        by='OBJECTID'
    )['PARENTOBJID'].apply(list).to_dict()
    return rftree


def get_adms_rec_rev(chain, rdadm, housdict, objdict):
    """
    Recursive address chain builder
    """
    objid = chain[-1]

    if objid in rdadm and objid == objid:
        prnts = rdadm[objid]
        if len(prnts) > 1:
            prnts = [x for x in prnts if x in objdict]
            if len(prnts) > 1:
                seedOKTMO = housdict[chain[0]]['OKTMO']
                prntOKTMO = {prnt: objdict[prnt]['OKTMO'] for prnt in prnts}
                prnts = [prnt for prnt in prnts if len(set(seedOKTMO).intersection(set(prntOKTMO[prnt]))) > 0]
            if len(prnts) == 0:
                return None
        chains = [chain + [obj] for obj in prnts if obj == obj]
        if len(chains) > 1:
            return [get_adms_rec_rev(ch, rdadm, housdict, objdict) for ch in chains]
        if len(chains) == 0:
            return chain
        return get_adms_rec_rev(chains[0], rdadm, housdict, objdict)
    else:
        return chain

In [122]:
fname = glob.glob(os.path.join(region_dir, 'AS_MUN_HIERARCHY_*.XML'))
if len(fname) != 1:
    msg = f'Please check file count for region {region_dir} there are {len(fname)} files'
    #logger.error(msg)
    raise Exception(msg)
fname = fname[0]
adm = parse_xml(fname)
adm = adm[
    adm.ENDDATE.apply(
        lambda x: datetime.datetime.strptime(
            x, '%Y-%m-%d'
        ) > datetime.datetime.fromtimestamp(
            time.time()
        )
    )
]
chready = 'PATH' in adm.columns
cols = ['OBJECTID', 'PARENTOBJID'] + (['PATH'] if chready else [])
adm0 = adm[adm['ISACTIVE'] == '1'][cols].merge(
    hadobj[(hadobj['ISACTUAL'] == '1') & (hadobj['ISACTIVE'] == '1')],
    on='OBJECTID'
)
cleanup(adm)

In [123]:
#adm0[adm0['OBJECTID'] == '1578']

Unnamed: 0,OBJECTID,PARENTOBJID,PATH,HOUSENUM,HOUSENUM1,HOUSENUM2,HOUSETYPE,HOUSETYPE1,HOUSETYPE2,ISACTIVE,...,NAME1,NAME2,OBJECTGUID,OKTMO,TYPELONGNAME,TYPELONGNAME1,TYPELONGNAME2,TYPENAME,TYPENAME1,TYPENAME2
15,1578,95230363,11.95230356.95230363.1578,,,,,,,1,...,,,1d2aa610-1553-49b6-99d2-4c65cda0868d,[79615415106],Хутор,,,х,,


In [135]:
chready = 'PATH' in adm.columns
cols = ['OBJECTID', 'PARENTOBJID'] + (['PATH'] if chready else [])
adm0 = adm[adm['ISACTIVE'] == '1'][cols].merge(
    hadobj[(hadobj['ISACTUAL'] == '1') & (hadobj['ISACTIVE'] == '1')],
    on='OBJECTID'
)
print(f'chready: {chready}')

chready: True


In [136]:
#adm0[adm0['OBJECTID'] == '1578']

Unnamed: 0,OBJECTID,PARENTOBJID,PATH,HOUSENUM,HOUSENUM1,HOUSENUM2,HOUSETYPE,HOUSETYPE1,HOUSETYPE2,ISACTIVE,...,NAME1,NAME2,OBJECTGUID,OKTMO,TYPELONGNAME,TYPELONGNAME1,TYPELONGNAME2,TYPENAME,TYPENAME1,TYPENAME2
15,1578,95230363,11.95230356.95230363.1578,,,,,,,1,...,,,1d2aa610-1553-49b6-99d2-4c65cda0868d,[79615415106],Хутор,,,х,,


In [138]:
if chready:
    chains = [
        tuple(y for y in reversed(x.split('.')))
        #for x in tqdm(adm0[adm0['LEVEL'] == '10']['PATH'])
        for x in tqdm(adm0['PATH'])
    ]
    cleanup(adm0)
    #print(len(chains), chains[:5])
    
hadobjd = hadobj.set_index('OBJECTID').to_dict('index')
if not chready:
    # get child-parent dictionary
    rdadm = get_adms(adm0)
    rdadm = {k: list(set(v)) for k, v in rdadm.items()}
    cleanup(adm0)
    #print([(k, v) for k, v in rdadm.items()][:5])
    # building chains recursively
    chains = [
        get_adms_rec_rev([x], rdadm, hp, adobjp)
        #for x in tqdm(hadobj[hadobj['LEVEL'] == '10']['OBJECTID'].drop_duplicates())
        for x in tqdm(hadobj['OBJECTID'].drop_duplicates())
    ]
    chains = [x for x in chains if x is not None]
## save and clean
#hadobj.to_csv(f'{region}_hadobj.csv', index=False)
cleanup(hadobj)
#[(k, v) for k, v in hadobjd.items()][:2]


  0%|          | 0/154464 [00:00<?, ?it/s][A
 32%|███▏      | 49929/154464 [00:00<00:00, 498996.62it/s][A
100%|██████████| 154464/154464 [00:00<00:00, 516239.31it/s][A


In [140]:
[x for x in chains if '1578' in x]

[('1578', '95230363', '95230356', '11'),
 ('1881', '1578', '95230363', '95230356', '11'),
 ('1854', '1578', '95230363', '95230356', '11'),
 ('1547', '1578', '95230363', '95230356', '11'),
 ('1383', '1578', '95230363', '95230356', '11'),
 ('1293', '1578', '95230363', '95230356', '11'),
 ('1747', '1578', '95230363', '95230356', '11')]

In [202]:
dfch = pd.DataFrame()
if not chready:
    odd_chains = [
        tuple(x)
        for x in chains
        if type(x[0]) == list
    ]
    odd_chains = [reduce_included(x) for x in odd_chains]
    odd_chains = [x for x, y in odd_chains if y] + [z for x, y in odd_chains if not y for z in x]
    chains = [
        tuple(x)
        for x in chains
        if type(x[0]) != list
    ] + odd_chains

dfch['chain'] = list(set(chains))
cleanup(chains)
#dfch.head()

In [203]:
dfch['levchain'] = [
    tuple([hadobjd[y]['LEVEL'] for y in x if y != '0' and y in hadobjd])
    for x in tqdm(dfch['chain'])
]
dat = [
    {
        m: l
        for m, l in zip(x, y)
    }
    for x, y in zip(dfch['levchain'], dfch['chain'])
]
for i in range(10, 0, -1):
    dfch[f'{i}'] = [
        d[f'{i}']
        if f'{i}' in d
        else None
        for d in dat
    ]
#dfch.head()

100%|██████████| 154464/154464 [00:00<00:00, 191845.05it/s]


In [204]:
def reduce_included(x):
    """
    Reduce included chains
    """
    maxl = 0
    maxch = []
    x = [tuple(y) for y in x]
    for y in x:
        maxl = (len(y) > maxl) * len(y) + (len(y) <= maxl) * maxl
        maxch = y if len(y) == maxl else maxch
    mask = [len(set(y).intersection(set(maxch))) == len(y) and y != maxch for y in x]
    ret = [y for y, z in zip(x, mask) if not z]
    single = False
    if len(ret) == 1:
        ret = ret[0]
        single = True
    return ret, single


def get_town(x):
    """
    Chain post-cleanup.
    """
    priority = ['5', '6', '4', '7', '1']
    street = [f'{i}' for i in range(8, 0, -1)]
    streets = [p for p in street if x[p] == 1]
    if len(streets) == 0:
        street = None
    else:
        street = streets[0]
    town = [p for p in priority if p != street and x[p] == 1]
    town = town[0] if len(town) > 0 else None
    leftover = [
        x for x in streets
        if x != street
        and x != town
        and x not in ['1', '2', '3']
    ]
    muni = [x for x in streets if x in ['2', '3']]

    return street, town, leftover, muni

In [205]:
chl = list(set(dfch['levchain'].apply(lambda x: '-'.join(x))))
df = pd.DataFrame()
df['levchain'] = chl
for i in range(10, 0, -1):
    dat = [(f'{i}' in y.split('-')) * 1 for y in chl]
    df[f'{i}'] = dat
#df.head()
lst = df.apply(get_town, axis=1)
df['street'] = [x[0] for x in lst]
df['tow'] = [x[1] for x in lst]
df['left'] = [x[2] for x in lst]
df['mun'] = [x[3][0] if len(x[3]) > 0 else np.nan for x in lst]
df['levchain'] = df['levchain'].apply(lambda x: tuple(x.split('-')))
#df.head()
dfch = dfch.merge(df[['levchain', 'street', 'tow', 'left', 'mun']], on='levchain')
dfch['id_reg'] = region
cleanup(df)

dfch['id_tow'] = dfch.apply(lambda x: x[f'{x["tow"]}'] if pd.notna(x["tow"]) else np.nan, axis=1)
dfch['id_mun'] = dfch.apply(lambda x: x[f'{x["mun"]}'] if pd.notna(x["mun"]) else np.nan, axis=1)

dfch.rename(columns={'1': 'l1', '2': 'l2', '3': 'l3', '4': 'l4', '5': 'l5',
                     '6': 'l6', '7': 'l7', '8': 'l8', '9': 'l9', '10': 'l10'}, inplace=True)

#dfch.head()
dfch.to_csv(os.path.join(final_dir, f'{region}_parsed_chains.csv'), index=False)



Unnamed: 0,chain,levchain,l10,l9,l8,l7,l6,l5,l4,l3,l2,l1,street,tow,left,mun,id_reg,id_tow,id_mun
0,"(51471201, 3217, 2455, 95230402, 95230395, 11)","(10, 8, 6, 4, 3, 1)",51471201,,3217,,2455,,95230402,95230395,,11,8,6,[4],3,1,2455,95230395
1,"(80632002, 2621, 2743, 95230403, 95230395, 11)","(10, 8, 6, 4, 3, 1)",80632002,,2621,,2743,,95230403,95230395,,11,8,6,[4],3,1,2743,95230395
2,"(64051411, 2634, 2478, 95230401, 95230395, 11)","(10, 8, 6, 4, 3, 1)",64051411,,2634,,2478,,95230401,95230395,,11,8,6,[4],3,1,2478,95230395
3,"(40967556, 2710, 2260, 95230397, 95230395, 11)","(10, 8, 6, 4, 3, 1)",40967556,,2710,,2260,,95230397,95230395,,11,8,6,[4],3,1,2260,95230395
4,"(64829437, 6023, 5288, 95230390, 95230389, 11)","(10, 8, 6, 4, 3, 1)",64829437,,6023,,5288,,95230390,95230389,,11,8,6,[4],3,1,5288,95230389


IndexError: list index out of range


  0%|          | 0/154464 [00:00<?, ?it/s][A
 25%|██▌       | 38695/154464 [00:00<00:00, 386913.47it/s][A
 61%|██████    | 93796/154464 [00:00<00:00, 483406.17it/s][A
100%|██████████| 154464/154464 [00:00<00:00, 479162.55it/s][A


[('1578', '95230363', '95230356', '11'),
 ('1881', '1578', '95230363', '95230356', '11'),
 ('1854', '1578', '95230363', '95230356', '11'),
 ('1547', '1578', '95230363', '95230356', '11'),
 ('1383', '1578', '95230363', '95230356', '11'),
 ('1293', '1578', '95230363', '95230356', '11'),
 ('1747', '1578', '95230363', '95230356', '11')]


  0%|          | 0/148345 [00:00<?, ?it/s][A
 29%|██▊       | 42587/148345 [00:00<00:00, 425830.81it/s][A
 62%|██████▏   | 92432/148345 [00:00<00:00, 468515.09it/s][A
100%|██████████| 148345/148345 [00:00<00:00, 472826.72it/s][A

  0%|          | 0/148345 [00:00<?, ?it/s][A
  9%|▉         | 14078/148345 [00:00<00:00, 140768.05it/s][A
 19%|█▉        | 28155/148345 [00:00<00:00, 139889.59it/s][A
 28%|██▊       | 42145/148345 [00:00<00:00, 138804.68it/s][A
 38%|███▊      | 56027/148345 [00:00<00:00, 135682.42it/s][A
 47%|████▋     | 69697/148345 [00:00<00:00, 136037.56it/s][A
 56%|█████▌    | 83308/148345 [00:00<00:00, 131749.90it/s][A
 65%|██████▌   | 96508/148345 [00:00<00:00, 126993.72it/s][A
 74%|███████▍  | 110209/148345 [00:00<00:00, 130031.64it/s][A
 83%|████████▎ | 123287/148345 [00:00<00:00, 130254.61it/s][A
100%|██████████| 148345/148345 [00:01<00:00, 132272.50it/s][A


{'HOUSENUM': nan,
 'HOUSENUM1': nan,
 'HOUSENUM2': nan,
 'HOUSETYPE': nan,
 'HOUSETYPE1': nan,
 'HOUSETYPE2': nan,
 'ISACTIVE': '1',
 'ISACTUAL': '1',
 'KLADR': ['0100200001900'],
 'LEVEL': '6',
 'LEVELNAME': 'Населенный пункт',
 'NAME': 'Соколов',
 'NAME1': nan,
 'NAME2': nan,
 'OBJECTGUID': '1d2aa610-1553-49b6-99d2-4c65cda0868d',
 'OKTMO': ['79615415106'],
 'TYPELONGNAME': 'Хутор',
 'TYPELONGNAME1': nan,
 'TYPELONGNAME2': nan,
 'TYPENAME': 'х',
 'TYPENAME1': nan,
 'TYPENAME2': nan}

Unnamed: 0,chain
0,"(80632002, 2621, 2743, 95230403, 95230395, 11)"
1,"(51471201, 3217, 2455, 95230402, 95230395, 11)"
2,"(13061096, 4462, 3634, 3380, 95230384, 9523038..."
3,"(31730642, 625, 58, 95230406, 11)"
4,"(64051411, 2634, 2478, 95230401, 95230395, 11)"



  0%|          | 0/148345 [00:00<?, ?it/s][A
 11%|█▏        | 17021/148345 [00:00<00:00, 170182.17it/s][A
 24%|██▎       | 34988/148345 [00:00<00:00, 175744.73it/s][A
 36%|███▌      | 52998/148345 [00:00<00:00, 177723.85it/s][A
 48%|████▊     | 71149/148345 [00:00<00:00, 179210.76it/s][A
 60%|██████    | 89239/148345 [00:00<00:00, 179812.50it/s][A
 73%|███████▎  | 107753/148345 [00:00<00:00, 181616.70it/s][A
 85%|████████▍ | 125915/148345 [00:00<00:00, 180555.66it/s][A
100%|██████████| 148345/148345 [00:00<00:00, 179677.47it/s][A


Unnamed: 0,chain,levchain,10,9,8,7,6,5,4,3,2,1
0,"(80632002, 2621, 2743, 95230403, 95230395, 11)","(10, 8, 6, 4, 3, 1)",80632002,,2621,,2743,,95230403.0,95230395,,11
1,"(51471201, 3217, 2455, 95230402, 95230395, 11)","(10, 8, 6, 4, 3, 1)",51471201,,3217,,2455,,95230402.0,95230395,,11
2,"(13061096, 4462, 3634, 3380, 95230384, 9523038...","(10, 8, 7, 6, 4, 3, 1)",13061096,,4462,3634.0,3380,,95230384.0,95230381,,11
3,"(31730642, 625, 58, 95230406, 11)","(10, 8, 6, 3, 1)",31730642,,625,,58,,,95230406,,11
4,"(64051411, 2634, 2478, 95230401, 95230395, 11)","(10, 8, 6, 4, 3, 1)",64051411,,2634,,2478,,95230401.0,95230395,,11


Unnamed: 0,chain,levchain,10,9,8,7,6,5,4,3,2,1


Index(['HOUSENUM', 'HOUSENUM1', 'HOUSENUM2', 'HOUSETYPE', 'HOUSETYPE1',
       'HOUSETYPE2', 'ISACTIVE', 'ISACTUAL', 'KLADR', 'LEVEL', 'LEVELNAME',
       'NAME', 'NAME1', 'NAME2', 'OBJECTGUID', 'OBJECTID', 'OKTMO',
       'TYPELONGNAME', 'TYPELONGNAME1', 'TYPELONGNAME2', 'TYPENAME',
       'TYPENAME1', 'TYPENAME2'],
      dtype='object')

HOUSENUM                                          NaN
HOUSENUM1                                         NaN
HOUSENUM2                                         NaN
HOUSETYPE                                         NaN
HOUSETYPE1                                        NaN
HOUSETYPE2                                        NaN
ISACTIVE                                            1
ISACTUAL                                            1
LEVEL                                               8
LEVELNAME                Элемент улично-дорожной сети
NAME                                     Дзержинского
NAME1                                             NaN
NAME2                                             NaN
OBJECTGUID       75dd371a-fb26-47d7-906f-5702ad3cb8c7
TYPELONGNAME                                    Улица
TYPELONGNAME1                                     NaN
TYPELONGNAME2                                     NaN
TYPENAME                                           ул
TYPENAME1                   