### DB 연결

In [1]:
import psycopg2

conn = psycopg2.connect(
  'host=localhost port=5432 dbname=postgres user=postgres password=postgres',
  options='-c search_path=sbd'
)
conn.set_session(autocommit=True)
cur = conn.cursor()

### 테이블 생성

In [2]:
cur.execute(
  f'''
  select count(*)
  from information_schema.tables
  where
    table_schema = 'sbd' and
    table_name ~ 'building_dong'
  '''
)
if not cur.fetchone()[0]:
  cur.execute(
    open('sql/sbd-create_table_building_dong.sql', 'r').read()
  )

In [3]:
cur.execute(
  f'''
  select column_name
  from information_schema.columns
  where 
    table_schema = 'sbd' and
    table_name = 'building_dong' and
    column_default is null
  '''
)
column_list = [
  col[0]
  for col
  in cur.fetchall()
]

In [4]:
cur.execute(
  'delete from building_dong'
)

### 업로드 함수

In [5]:
def uploadToDB(data_table):
  print(datetime.now(),'start...',end='')
  data_table.columns = [
    '관리_건축물대장_PK', '대장_구분_코드', '대장_구분_코드_명', '대장_종류_코드', '대장_종류_코드_명', '대지_위치', '도로명_대지_위치', '건물_명', '시군구_코드', '법정동_코드', '대지_구분_코드', '번', '지', '특수지_명', '블록', '로트', '외필지_수', '새주소_도로_코드', '새주소_법정동_코드', '새주소_지상지하_코드', '새주소_본_번', '새주소_부_번', '동_명', '주_부속_구분_코드', '주_부속_구분_코드_명', '대지_면적(㎡)', '건축_면적(㎡)', '건폐_율(%)', '연면적(㎡)', '용적_률_산정_연면적(㎡)', '용적_률(%)', '구조_코드', '구조_코드_명', '기타_구조', '주_용도_코드', '주_용도_코드_명', '기타_용도', '지붕_코드', '지붕_코드_명', '기타_지붕', '세대_수(세대)', '가구_수(가구)', '높이(m)', '지상_층_수', '지하_층_수', '승용_승강기_수', '비상용_승강기_수', '부속_건축물_수', '부속_건축물_면적(㎡)', '총_동_연면적(㎡)', '옥내_기계식_대수(대)', '옥내_기계식_면적(㎡)', '옥외_기계식_대수(대)', '옥외_기계식_면적(㎡)', '옥내_자주식_대수(대)', '옥내_자주식_면적(㎡)', '옥외_자주식_대수(대)', '옥외_자주식_면적(㎡)', '허가_일', '착공_일', '사용승인_일', '허가번호_년', '허가번호_기관_코드', '허가번호_기관_코드_명', '허가번호_구분_코드', '허가번호_구분_코드_명', '호_수(호)', '에너지효율_등급', '에너지절감_율', '에너지_EPI점수', '친환경_건축물_등급', '친환경_건축물_인증점수', '지능형_건축물_등급', '지능형_건축물_인증점수', '생성_일자', '내진_설계_적용_여부', '내진_능력'
  ]
  data_table['시도_코드'] = [code[0:2] for code in data_table['시군구_코드']]
  data_table = data_table[data_table['시도_코드'] == '11'] # '전국' 중 '서울'만 추출
  data_table['pnu'] = data_table['시군구_코드'] + data_table['법정동_코드'] + [ # 토지 ID인 PNU 생성
    '1' if code == '0' else '2' if code == '1' else '0'
    for code
    in data_table['대지_구분_코드']
  ] + data_table['번'] + data_table['지']
  data_table = data_table[[
    'pnu', '관리_건축물대장_PK', '대장_구분_코드_명', '대지_위치', '도로명_대지_위치', '건물_명', '동_명', '주_부속_구분_코드_명', '대지_면적(㎡)', '건축_면적(㎡)', '건폐_율(%)', '연면적(㎡)', '용적_률_산정_연면적(㎡)', '용적_률(%)', '구조_코드', '구조_코드_명', '기타_구조', '주_용도_코드', '주_용도_코드_명', '기타_용도', '세대_수(세대)', '가구_수(가구)', '호_수(호)', '높이(m)', '지상_층_수', '지하_층_수', '허가_일', '착공_일', '사용승인_일', '생성_일자'
  ]]
  data_table.columns = column_list
  data_table.structure_detail = data_table.structure_detail.replace('[\\\]', '', regex=True) # 전처리 : 특수문자 제거
  data_table.use_detail = data_table.use_detail.replace('[\\\]', '', regex=True) # 전처리 : 특수문자 제거
  data_table.to_csv( # .txt 파일로 로컬 저장
    'temp_building_dong.txt',
    sep='|',
    index=False,
    header=False,
    encoding='CP949'
  )
  temp_file = open('temp_building_dong.txt', 'r')
  print('upload...',end='')
  cur.copy_from( # file bulk insert
    temp_file,
    'building_dong',
    sep='|',
    columns=column_list,
    null=''
  )
  temp_file.close()
  os.remove('temp_building_dong.txt')
  print('end',datetime.now())

### 소스 데이터 업로드

In [6]:
from zipfile import ZipFile
import pandas as pd
import os
from datetime import datetime

zf = ZipFile('D:/data/building_register/building_dong/국토교통부_건축물대장_표제부+(2022년+07월).zip')
source_chunks = pd.read_csv(
  zf.open('mart_djy_03.txt'),
  sep='|',
  encoding='CP949',
  header=None,
  dtype='string',
  chunksize=500_000
)

In [7]:
for source_dt in source_chunks:
  uploadToDB(source_dt)

2022-09-22 23:54:45.178350 start...

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_table['시도_코드'] = [code[0:2] for code in data_table['시군구_코드']]


upload...end 2022-09-22 23:54:47.155589
2022-09-22 23:54:56.467202 start...upload...end 2022-09-22 23:54:58.363611
2022-09-22 23:55:07.889169 start...upload...end 2022-09-22 23:55:09.803312
2022-09-22 23:55:19.237684 start...upload...end 2022-09-22 23:55:21.086030
2022-09-22 23:55:30.705344 start...upload...end 2022-09-22 23:55:32.558024
2022-09-22 23:55:42.298779 start...upload...end 2022-09-22 23:55:44.452835
2022-09-22 23:55:54.012610 start...upload...end 2022-09-22 23:55:56.120506
2022-09-22 23:56:05.728141 start...upload...end 2022-09-22 23:56:07.764849
2022-09-22 23:56:17.386244 start...upload...end 2022-09-22 23:56:19.442936
2022-09-22 23:56:28.965088 start...upload...end 2022-09-22 23:56:31.016756
2022-09-22 23:56:40.565534 start...upload...end 2022-09-22 23:56:42.646731
2022-09-22 23:56:52.164711 start...upload...end 2022-09-22 23:56:54.209915
2022-09-22 23:57:03.753763 start...upload...end 2022-09-22 23:57:06.012533
2022-09-22 23:57:15.585914 start...upload...end 2022-09-22 2