In [8]:
import requests
import json

from typing import Dict
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys

import pandas as pd

import openpyxl
from openpyxl.styles import Alignment

def request_url(url):
    # HTTP GET 요청을 보내고 응답 받은 데이터를 JSON 형태로 반환하는 함수
    r = requests.get(url, data={"sameAddressGroup": "false"}, headers={
        "Host": "new.land.naver.com",
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
        "Authorization": "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IlJFQUxFU1RBVEUiLCJpYXQiOjE2ODU4ODA4NjksImV4cCI6MTY4NTg5MTY2OX0.7ZKPpEs6jShBYNigKu-zXScm0CvwkQYgUCyF6o5ZMZM"
    })
    r.encoding = "utf-8-sig"
    return json.loads(r.text)


#########
def getLongLat(temp):
    longLat = ()
    if 'complex' in temp.keys():
        if 'latitude' in temp['complex'] and 'longitude' in temp['complex']:
            longLat = (str(temp['complex']['longitude']), str(temp['complex']['latitude']))
    return longLat

majorWorkplace = {
    'gangnam' : ('127.0276188', '37.4979517'),
    'gwanghwamun' : ('126.9765024','37.5716645'),
    'yeouido' : ('126.9240785','37.5216245')
}

urlDict = {}
urlDict['urlBase'] = 'https://map.naver.com/v5/api/transit/directions/point-to-point?'
urlDict['departure'] = 'start='
urlDict['arrival'] = 'goal='
urlDict['departureTime'] = 'crs=EPSG:4326&departureTime=2023-08-04T02:14:53&'
urlDict['option'] = 'mode=STATIC&lang=ko&includeDetailOperation=true'

def getTimeAndDistance(departure, arrivals=majorWorkplace, urlDict=urlDict):
    urlBase = urlDict['urlBase']
    departure = urlDict['departure'] + departure[0] + ',' + departure[1] + '&'
    
    durationAndDistance = {}    
    
    for arrivalName, arrivalCoord in arrivals.items():
        arrival = urlDict['arrival'] + arrivalCoord[0] + ',' + arrivalCoord[1] + '&'
        url = urlBase + departure + arrival + urlDict['departureTime'] + urlDict['option']
        r = requests.get(url, data={"sameAddressGroup":"false"})
        r.encoding = "utf-8-sig"
        temp = json.loads(r.text)
        if 'staticPaths' in temp.keys():
            pathList = temp['staticPaths']
            if len(pathList) != 0:
                durationAndDistance[arrivalName] = (pathList[0]['duration'], pathList[0]['distance'])
    return durationAndDistance

#########
def get_dong_code(file_path, location_gu, location_dong):
    # 동네 이름이 포함된 코드 찾기
    location_num_gu, location_num_dong = '0000000000', '0000000000'

    with open(file_path, 'r', encoding='euc-kr') as f:
        for line in f:
            if location_gu in line and '존재' in line:
                location_num_gu = line[0:10]
                break
        for line in f:
            if location_gu + ' ' + location_dong in line and '존재' in line:
                location_num_dong = line[0:10]
                break
    return location_num_gu, location_num_dong

def get_complex_info(location_num_dong, house_hold_count_lower_limit=300):
    url_base = 'https://new.land.naver.com/api/regions/complexes?'
    cortar_no = 'cortarNo=' + location_num_dong + '&'
    option = 'realEstateType=APT%3APRE&order='

    url = url_base + cortar_no + option

    # 단지 정보 받아오기
    temp = request_url(url)

    complex_list = []

    for apt in temp['complexList']:
        if house_hold_count_lower_limit <= apt['totalHouseholdCount']:
            complex_list.append((apt['complexName'], apt['complexNo'], apt['totalHouseholdCount']))
            print(apt['complexName'], apt['complexNo'], apt['totalHouseholdCount'])
            print()

    return complex_list

def get_complex_details(complex_list, deal_price_lower_limit=0, deal_price_upper_limit=70000, pyeong_low_limit="15", pyeong_upper_limit="40"):
    complex_dict = {}

    for item in complex_list:
        complex_name = item[0]
        complex_num = item[1]

        # 단지 리스트를 초기화한다.
        complex_dict[complex_name] = {}

        url_base = 'https://new.land.naver.com/api/complexes/'
        option1 = complex_num + '?'
        option2 = 'complexNo=' + complex_num + '&initial=Y'

        url = url_base + option1 + option2

        temp = request_url(url)

        # 단지마다 가지고 있는 평 정보를 가져온다.
        for pyeong in temp['areaList']:
            if pyeong_low_limit <= pyeong['exclusivePyeong'] and pyeong['exclusivePyeong'] <= pyeong_upper_limit:
                pyeong_no = pyeong['pyeongNo']

                # 각 평 번호에 해당하는 평의 가격 정보를 가져온다.
                url_base = 'https://new.land.naver.com/api/complexes/'
                option1 = complex_num + '/prices?complexNo=' + complex_num
                option2 = '&tradeType=A1&year=5&priceChartChange=true&areaNo=' + str(pyeong_no) + '&type=summary'

                url = url_base + option1 + option2

                temp = request_url(url)

                if 'marketPrice' in temp.keys():
                    if deal_price_lower_limit <= temp['marketPrice']['dealAveragePrice'] <= deal_price_upper_limit:
                        # 평 번호마다 딕셔너리를 새로 만들어준다.
                        complex_dict[complex_name][pyeong_no] = []

                        pyeong_list = complex_dict[complex_name][pyeong_no]
                        pyeong_list.append(("공급 면적(m2)", pyeong['supplyArea']))
                        pyeong_list.append(("전용 면적(m2)", pyeong['exclusiveArea']))
                        pyeong_list.append(("전용 면적(평)", pyeong['exclusivePyeong']))
                        pyeong_list.append(("평균 매매가", temp['marketPrice']['dealAveragePrice']))
                        pyeong_list.append(("평균 전세가", temp['marketPrice']['leaseAveragePrice']))
                        pyeong_list.append(("전세가율", temp['marketPrice']['leasePerDealRate']))

                        # 최근 실제 거래 가격을 가져온다.
                        url_base = 'https://new.land.naver.com/api/complexes/'
                        option1 = complex_num + '/prices/real?complexNo=' + complex_num
                        option2 = '&tradeType=A1&year=5&priceChartChange=true&areaNo=' + str(pyeong_no) + '&type=table'

                        url = url_base + option1 + option2

                        temp = request_url(url)

                        if 'realPriceOnMonthList' in temp.keys() and len(temp['realPriceOnMonthList']) != 0:
                            pyeong_list.append(("매매 년 월", f"{temp['realPriceOnMonthList'][0]['realPriceList'][0]['tradeYear']}/{temp['realPriceOnMonthList'][0]['realPriceList'][0]['tradeMonth']}"))
                            pyeong_list.append(("최근 매매가", temp['realPriceOnMonthList'][0]['realPriceList'][0]['dealPrice']))
                else:
                    # 평은 있는데 가격이 없는 경우도 있음
                    url_base = 'https://new.land.naver.com/api/complexes/overview/'
                    option1 = complex_num + '?complexNo=' + complex_num

                    url = url_base + option1

                    temp = request_url(url)

                    if 'maxPrice' in temp.keys() and 'minPrice' in temp.keys():
                        mean_price = (temp['maxPrice'] + temp['minPrice']) / 2
                        if mean_price != 0 and deal_price_lower_limit <= mean_price <= deal_price_upper_limit:
                            complex_dict[complex_name]["meanPrice"] = [mean_price]

        if len(complex_dict[complex_name]) != 0:
            # 주변 교통 정보를 받아온다.
            url_base = 'https://m.land.naver.com/complex/getFacilTransInfo?lat=0.0&lng=0.0&'
            cortar_no = complex_num
            cortar_no = 'hscpNo=' + cortar_no

            url = url_base + cortar_no

            temp = request_url(url)

            if 'hotPlaceList' in temp.keys():
                complex_dict[complex_name]["hotPlaceList"] = []

                hot_place_num = len(temp['hotPlaceList'])

                for i in range(hot_place_num):
                    complex_dict[complex_name]["hotPlaceList"].append((temp['hotPlaceList'][i]['hotPoint'], temp['hotPlaceList'][i]['placeNm'], temp['hotPlaceList'][i]['transTpNm'], temp['hotPlaceList'][i]['minutes']))
                
#             complexNum = complex_num
            urlBase = 'https://new.land.naver.com/api/complexes/'
            option1 = complex_num + '?'
            option2 = 'complexNo=' + complex_num + '&initial=Y'

            url = urlBase+option1+option2

            temp = request_url(url)
            durationAndDistance = getTimeAndDistance(getLongLat(temp), majorWorkplace)
            
            if len(durationAndDistance) != 0: 
                complex_dict[complex_name]["majorWorkplaces"] = []
                for key, value in durationAndDistance.items():
                    complex_dict[complex_name]["majorWorkplaces"].append((key, value))
                    print(key, value)


    
    return complex_dict

def get_complex_name_list(complex_dict):
    complex_name_list = []
    for complex_name, complex_details in complex_dict.items():
        if len(complex_details) != 0:
            print(f'단지이름: {complex_name}')
            complex_name_list.append(complex_name)
            for pyeong_no, pyeong_list in complex_details.items():
                print(pyeong_no)
                for item in pyeong_list:
                    print(item)
                print()
    return complex_name_list            

def getTime(text: str) -> str:
    startIndex = text.index(', ')
    endIndex = text.index('분거리')
    return text[startIndex + 2:endIndex]

def getNum(text: str) -> str:
    startIndex = 0
    endIndex = text.index('개')
    return text[startIndex:endIndex]

def getNearInfo(str1: str, str2: str) -> Dict:
    nearInfoDic = {}
    categoryName = ['마트/백화점', '종합병원', '공원', '스타벅스', '학원']
    nearInfoList = []

    startIndex = 0
    index1 = 1
    index2 = 0
    while True:
        try:
            endIndex = str1[startIndex:].index('\n')
            text = str1[startIndex:startIndex + endIndex + 1]

            if categoryName[2] in text:
                break

            if categoryName[0] in text:
                nearInfoDic[categoryName[0]] = {}
            elif categoryName[1] in text:
                nearInfoDic[categoryName[1]] = {}

            if categoryName[0] in nearInfoDic.keys() and categoryName[1] not in nearInfoDic.keys():
                index1 = index1 + 1
                index2 = index1
            elif categoryName[1] in nearInfoDic.keys():
                index2 = index2 + 1
            nearInfoList.append(text[:len(text) - 1])
            strLen = len(str1[startIndex:startIndex + endIndex + 1])
            startIndex = startIndex + endIndex + 1
        except Exception as e:
            break

    index01 = 2
    index1 = index1 - 1

    index02 = index1 + 1
    index2 = index2 - 1

    list1 = nearInfoList[index01:index02]
    list2 = nearInfoList[index02 + 1:]

    for i in range(len(list1)):
        if i % 2 == 0 and i + 1 < len(list1):
            nearInfoDic[categoryName[0]][list1[i]] = getTime(list1[i + 1])

    for i in range(len(list2)):
        if i % 2 == 0 and i + 1 < len(list2):
            nearInfoDic[categoryName[1]][list2[i]] = getTime(list2[i + 1])

    startIndex = 0
    endIndex = 0
    index = 0
    index3 = 0
    index4 = 0
    nearInfoList1 = []
    while True:
        try:
            index = index + 1
            endIndex = str2[startIndex:].index('\n')
            text = str2[startIndex:startIndex + endIndex + 1]

            if categoryName[3] in text:
                nearInfoDic[categoryName[3]] = {}
                index3 = index
            elif categoryName[4] in text:
                nearInfoDic[categoryName[4]] = {}
                index4 = index

            nearInfoList1.append(text[:len(text) - 1])
            strLen = len(str2[startIndex:startIndex + endIndex + 1])
            startIndex = startIndex + endIndex + 1
        except Exception as e:
            nearInfoList1.append(str2[startIndex:])
            break

    if categoryName[3] in nearInfoDic.keys():
        nearInfoDic[categoryName[3]] = getNum(nearInfoList1[index3])

    if categoryName[4] in nearInfoDic.keys():
        nearInfoDic[categoryName[4]] = getNum(nearInfoList1[index4])

    return nearInfoDic

def find_similar_string(target, string_list):
    max_similarity = 0
    similar_string = None

    for string in string_list:
        intersection = set(target) & set(string)
        union = set(target) | set(string)
        similarity = len(intersection) / len(union)

        if similarity > max_similarity:
            max_similarity = similarity
            similar_string = string

    return similar_string

def getSupplyHouseholdRatio(driver, file_path, locationTuple: str) -> str:
    url = 'https://hogangnono.com/api/supply?regionCode=' + locationTuple[0] + '&offerHouseholdFrom=2023-07-01&offerHouseholdTo=2025-07-01'
    driver.get(url)
    driver.implicitly_wait(10)
    
    temp = driver.find_element(By.XPATH, "/html/body/pre")
    driver.implicitly_wait(10)
    startNum = temp.text.find('offer_household_total')
    endNum = temp.text[startNum + 23:].find('}')

    currentHHNum = temp.text[startNum + 23:startNum + 23 + endNum]
    print("currentHHNum = ", currentHHNum)

    # Daraframe형식으로 엑셀 파일 읽기
    df = pd.read_excel(file_path)

    a = df['법정동코드'] == int(locationTuple[1])
#     print(df[a].iloc[0, 1])

    defalutHHNum = df[a].iloc[0, 1]
    
    print("defalutHHNum = ", defalutHHNum)
    
    householdRatio = (currentHHNum, round((float(currentHHNum) / float(defalutHHNum)) * 100, 2))
    return householdRatio

def getComplexCodeDic(locationNum: str) -> Dict[str, str]:
    service = Service()
    options = webdriver.ChromeOptions()
    driver = webdriver.Chrome(service=service, options=options)

    driver.get('https://hogangnono.com/region/' + locationNum)
    driver.implicitly_wait(10)

    tmp1 = driver.find_element(By.XPATH, '//*[@id="local3-aptlist-scroll"]/div[1]/ul')
    tmp2 = tmp1.find_elements(By.TAG_NAME, 'li')

    complexCodeDic = {}
    for li in tmp2:
        tmp3 = li.find_element(By.TAG_NAME, 'a')
        complexCodeDic[tmp3.find_element(By.CLASS_NAME, 'name').text] = tmp3.get_attribute('href')[27:32]

    driver.quit()
    return complexCodeDic

def getNearInfoForComplex(driver: webdriver.Chrome, complexCode: str) -> Dict:
    driver.get('https://hogangnono.com/apt/' + complexCode + '/0/0/store')
    driver.implicitly_wait(10)

    temp = driver.find_element(By.XPATH, "//div[contains(@class, 'css-3wmht0')]")
    driver.implicitly_wait(10)
    str1 = temp.text

    temp = driver.find_element(By.XPATH, "//div[contains(@class, 'css-dohd6y')]")
    driver.implicitly_wait(10)
    str2 = temp.text

    nearInfo = getNearInfo(str1, str2)
    return nearInfo

def save_dict_items_to_excel(file_path, dict_items):
    workbook = openpyxl.Workbook()
    sheet = workbook.active
    
    cnRow = 1
    inRow = 0
    maxRow = 0 
    for complexName, complexInfo in dict_items.items():    
        if len(complexInfo) != 0:
            cnRow = cnRow + maxRow
            sheet.cell(cnRow, 1, complexName)
            
            maxRow = 0
            inCol = 2
            for infoName, info in complexInfo.items():
                inRow = cnRow # 단지 이름과 행 시작은 동일하게
                print("infoName = ", infoName)
                print(info)
                if type(infoName) == int:
                    for data in info:
                        sheet.cell(inRow, inCol, data[0])
                        sheet.cell(inRow, inCol+1, data[1])
                        
                        inRow = inRow + 1 # 정보를 다음 행으로 넘긴다.
                        
                        if (inRow-cnRow) > maxRow:
                            maxRow = inRow - cnRow
                            
                    
                    inCol = inCol + 2 # 정보 한 묶음이 끝나면 옆으로
                            
                # 데이터 삽입이 끝나면 가운데 정렬 적용
                for row in sheet.iter_rows(min_row=cnRow, max_row=inRow - 1, min_col=1, max_col=inCol + 1):
                    for cell in row:
                        cell.alignment = Alignment(horizontal='center', vertical='center')                
                        

    workbook.save(file_path)
    

def main():
    print("원하는 지역을 입력하세요.")
    location_gu = input('00구 = ')
    location_dong = input('00동 = ')

    location_name = location_gu + '_' + location_dong
    file_path = "/Users/rhino/Downloads/DongCode.txt"
    location_num_gu, location_num_dong = get_dong_code(file_path, location_gu, location_dong)
    locationTuple = (location_num_gu, location_num_dong)
    print("locationTuple = ", locationTuple)
    if location_num_gu == '0000000000' or location_num_dong == '0000000000':
        print("입력한 지역 정보가 잘못되었습니다. 다시 입력해주세요.")
        return

    complex_list = get_complex_info(location_num_dong)
    complex_dict = get_complex_details(complex_list)
    

    if not complex_dict:
        print("해당 지역에 조건을 만족하는 단지가 없습니다.")
        return

    complex_name_list = get_complex_name_list(complex_dict)

    complexCodeDic = getComplexCodeDic(location_num_dong)
    complexCodeDicKeyList = list(complexCodeDic.keys())
#     print(complexCodeDicKeyList)

    options = webdriver.ChromeOptions()
    options.add_argument("headless")
    service = Service()
    
    driver = webdriver.Chrome(service=service, options=options)
    
    # 파일명
    file_path = '/Users/rhino/Downloads/HouseHoldingsCode.xlsx'
    householdRatio = getSupplyHouseholdRatio(driver, file_path, locationTuple)

    for targetComplexName in complex_name_list:
        keyName = find_similar_string(targetComplexName, complexCodeDicKeyList)

        try:
            nearInfo = getNearInfoForComplex(driver, complexCodeDic[keyName])
            
            complex_dict[targetComplexName]['nearInfo'] = nearInfo
            complex_dict[targetComplexName]['공급량(세대, 비율)'] = householdRatio
        except Exception as e:
            print(keyName, ":near info error = ", e)
    
    driver.quit()
    
#     for key, value in complex_dict.items():
#         if len(value) != 0:
#             print(key)
#     #         print(value)
#             for vk in value.keys():
#                 print(vk)
#         #         print(value[vk])
#                 if vk == 'nearInfo':
#                     for i0 in value[vk].items():
#                         print(i0)
#                 else:
#                     for item in value[vk]:
#                         print(item)

#             print()
    
#     print(complex_dict.items())
    file_path = '/Users/rhino/Downloads/' +location_name+'.xlsx'  # 저장할 엑셀 파일 경로

    save_dict_items_to_excel(file_path, complex_dict)

    print("done!!!!")
    
dictTmp = {}
if __name__ == "__main__":
    main()



원하는 지역을 입력하세요.
00구 = 분당구
00동 = 정자동
locationTuple =  ('4113500000', '4113510300')
느티마을공무원3,4단지 3014 1776

더샵분당파크리버 121966 506

동양파라곤(주상복합) 12915 344

로얄팰리스(주상복합) 12914 566

미켈란쉐르빌(주상복합) 8409 803

분당더샵스타파크(주상복합) 14532 378

상록라이프 2623 750

상록우성 2645 1762

상록임광보성 2637 568

아이파크분당1(주상복합) 8580 540

아이파크분당3(주상복합) 100722 307

정든동아1단지 111368 300

정든동아2단지 2663 706

정든신화 2690 564

정든우성6단지 2719 706

정든한진7차 2742 382

정든한진8차 2829 512

파크뷰(주상복합) 3621 1829

한솔1단지청구 2797 858

한솔2단지LG 2755 598

한솔3단지한일 2813 416

한솔4단지주공 7966 1651

한솔5단지주공 2767 1156

한솔6단지주공 2784 1039

gangnam (37, 19092)
gwanghwamun (67, 30064)
yeouido (57, 30066)
단지이름: 한솔5단지주공
10
('공급 면적(m2)', '74.39')
('전용 면적(m2)', '51.66')
('전용 면적(평)', '15.62')
('평균 매매가', 68000)
('평균 전세가', 21750)
('전세가율', '31~32%')
('매매 년 월', '2023/7')
('최근 매매가', 80000)

hotPlaceList
('100111', '강남역', '지하철+버스', '42')
('96123', '을지로입구역', '버스', '48')
('27914', '잠실역', '지하철+버스', '47')
('19716', '판교테크노벨리', '버스', '28')
('32983', '백현동카페거리', '버스', '23')
('58391', '코엑스', '지하철

In [None]:
def save_dict_items_to_excel(file_path, dict_items):
    workbook = openpyxl.Workbook()
    sheet = workbook.active
    

    cnRow = 1
    inRow = 0
    maxRow = 0 
    for complexName, complexInfo in dict_items.items():    
        if len(complexInfo) != 0:
#             print(complexInfo)
            cnRow = cnRow + maxRow
            sheet.cell(cnRow, 1, complexName)
            
            maxRow = 0
            inCol = 2
            for infoName, info in complexInfo.items():
                inRow = cnRow # 단지 이름과 행 시작은 동일하게
                if type(infoName) == int:
                    for data in info:
                        sheet.cell(inRow, inCol, data[0])
                        sheet.cell(inRow, inCol+1, data[1])
                        
                        inRow = inRow + 1 # 정보를 다음 행으로 넘긴다.
                        
                        if (inRow-cnRow) > maxRow:
                            maxRow = inRow - cnRow
                            
#                 elif infoName == 'hotPlaceList':
#                     for data in info:
#                         sheet.cell(inRow, inCol, data[1])
#                         sheet.cell(inRow, inCol+1, data[3])
                        
#                         inRow = inRow + 1 # 정보를 다음 행으로 넘긴다.
                        
#                         if (inRow-cnRow) > maxRow:
#                             maxRow = inRow - cnRow
                    
#                 elif infoName == 'nearInfo':
#                     for data in info:
#                         sheet.cell(inRow, inCol, data[1])
#                         sheet.cell(inRow, inCol+1, data[3])
                        
#                         inRow = inRow + 1 # 정보를 다음 행으로 넘긴다.
                        
#                         if (inRow-cnRow) > maxRow:
#                             maxRow = inRow - cnRow
#                 elif infoName == '공급량(세대, 비율)':
                # 데이터 삽입이 끝나면 가운데 정렬 적용
                for row in sheet.iter_rows(min_row=cnRow, max_row=inRow - 1, min_col=1, max_col=inCol + 1):
                    for cell in row:
                        cell.alignment = Alignment(horizontal='center', vertical='center')                
                        
                inCol = inCol + 2 # 정보 한 묶음이 끝나면 옆으로

    workbook.save(file_path)

    
'''
'마트/백화점': {'하나로마트(김해점)': '2', '홈플러스(분당오리점)': '3', '이마트(죽전점)': '5', '신세계백화점(경기점)': '5', '이랜드리테일(2001)(2001 분당점)': '5', '하나로마트(수지농협 토월점)': '8', '이마트(분당점)': '9'}, 
'종합병원': {'대진의료재단 분당제생병원': '20'}, 
'스타벅스': '3', 
'학원': '90'}, '

공급량(세대, 비율)': ('1267', 6.55)
'''
    
# 예시 사용
# dict_items_data = [('까치건영', {}), ('까치대우,롯데,선경', {}), ('까치롯데,선경', {}), ...]  # dict_items 데이터
file_path = '/Users/rhino/Downloads/output.xlsx'  # 저장할 엑셀 파일 경로

save_dict_items_to_excel(file_path, dictTmp)