# Финальная сборка и заливка табличек с данными о многоквартирных домах, зданиях и сооружениях

Заливаются таблички:
-  <code>apartment_houses_all_data</code>
-  <code>all_dmr_houses_data</code>
- <code>all_gis_houses_data</code>

Перед прогоном тетрадки нужно запустить подготовку данных:
- <code>step1_prepare_dmr_houses_data.ipynb</code>
- <code>step2_prepare_gis_houses_data.ipynb</code>

In [None]:
import os
import json
from warnings import warn
import pandas as pd
import numpy as np
from tqdm import tqdm

from postamats.utils import load
from postamats.utils.connections import DB
from postamats.utils.prepare_data import OBJECT_ID_COL, PREPARED_DATA_PATH, PREPARED_DMR_FILE, PREPARED_GIS_FILE

prepared_data_path = load.get_full_path_from_relative(PREPARED_DATA_PATH)

pd.set_option('display.max_columns', None)
tqdm.pandas()

In [None]:
# путь к json с реквизитами подключения к БД
CONFIG_PATH = '/Users/affernus/PROJECTS_DATA/hacks/postomat_optimisation/db_config.json'

In [None]:
prepared_gis_house_data = pd.read_pickle(
    os.path.join(prepared_data_path, PREPARED_GIS_FILE)
    ).drop(columns=OBJECT_ID_COL)
prepared_dmr = pd.read_pickle(os.path.join(prepared_data_path, PREPARED_DMR_FILE))

In [None]:
prepared_gis_house_data.head(2)

In [None]:
gis_cols = prepared_gis_house_data.columns

In [None]:
prepared_dmr.head(2)

In [None]:
dmr_cols = prepared_dmr.columns

In [None]:
dmr_fias = set(prepared_dmr['GUID_FIAS'].dropna())
dmr_kadn = set(prepared_dmr['KAD_N'].dropna())
dmr_kadzu = set(prepared_dmr['KAD_ZU'].dropna())

In [None]:
houses_data_fias = set(prepared_gis_house_data['GUID_FIAS'].dropna())
houses_data_kadn = set(prepared_gis_house_data['CADASTRAL_NUM'].dropna())

In [None]:
print('fias:', len(houses_data_fias), len(dmr_fias))
print('kadn', len(houses_data_kadn), len(dmr_kadn))
print('kadn', len(houses_data_kadn), len(dmr_kadzu))

In [None]:
print(len(houses_data_fias & dmr_fias))
print(len(houses_data_kadn & dmr_kadn))
print(len(houses_data_kadn & dmr_kadzu))

In [None]:
data_merged_fias = prepared_dmr.merge(
    prepared_gis_house_data,
    on=['GUID_FIAS'],
    how='left'
    )

In [None]:
data_merged_kadn = prepared_dmr.dropna(subset=['KAD_N']).merge(
    prepared_gis_house_data.dropna(subset=['CADASTRAL_NUM']),
    left_on=['KAD_N'],
    right_on=['CADASTRAL_NUM'],
    how='inner'
    )

In [None]:
data_merged_kadzu = prepared_dmr.dropna(subset=['KAD_ZU']).merge(
    prepared_gis_house_data.dropna(subset=['CADASTRAL_NUM']),
    left_on=['KAD_ZU'],
    right_on=['CADASTRAL_NUM'],
    how='inner'
    )

In [None]:
data_merged = pd.concat([data_merged_fias, data_merged_kadn, data_merged_kadzu])

data_merged.loc[data_merged['GUID_FIAS_x'].notna(),'GUID_FIAS'] = data_merged.loc[data_merged['GUID_FIAS_x'].notna(), 'GUID_FIAS_x']

data_merged['has_house_data'] = data_merged['GUID_HOUSE'].notna()
data_merged = data_merged.drop(columns=['GUID_FIAS_x', 'GUID_FIAS_y'])
data_merged = data_merged.sort_values(by='has_house_data')
print(data_merged.shape)
data_merged = data_merged.drop_duplicates(subset=OBJECT_ID_COL, keep='last')
data_merged = data_merged.drop(columns='has_house_data')
data_merged = data_merged.reset_index(drop=True)
print(data_merged.shape)
print(prepared_dmr.shape)

In [None]:
final_gis = data_merged[list(gis_cols) + [OBJECT_ID_COL]].copy()
final_gis = final_gis.dropna(
    subset=[col for col in final_gis.columns if col not in [OBJECT_ID_COL, 'GUID_FIAS']
    ], how='all')
final_gis

In [None]:
final_dmr = data_merged[dmr_cols].copy()
final_dmr

In [None]:
final_houses = data_merged.dropna(subset=['LAT', 'LON', 'TOTAL_AREA'])
final_houses['OBJECT_TYPE'] = 'многоквартирный дом'
final_houses

In [None]:
with open(CONFIG_PATH, mode='r') as db_file:
    db_config = json.load(db_file)

database = DB(db_config)

In [None]:
for data, name in zip(
    [final_dmr, final_gis, final_houses],
    ['all_dmr_houses_data', 'all_gis_houses_data', 'apartment_houses_all_data']):
    assert data[OBJECT_ID_COL].duplicated().sum() == 0
    database.load_to_bd(data, name)
    display(database.get_table_from_bd(name))