## 네이버랜드 크롤링

https://m.land.naver.com/search/result/양천구신월동

In [1]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import math
import re

In [2]:
#임의의 btm, lft, top, rgt값 계산

def calculate_bounding_box(lat, lon, zoom):
    # Earth's radius in meters
    R = 6378137
    # Approximate bounding box in meters for zoom level 14 (usually covers around 1km)
    bounding_box_half_side_length = 1000  # 1km / 2

    # Convert latitude and longitude from degrees to radians
    lat_rad = math.radians(lat)
    lon_rad = math.radians(lon)
    
    # Offset calculations
    lat_offset = (bounding_box_half_side_length / R) * (180 / math.pi)
    lon_offset = (bounding_box_half_side_length / (R * math.cos(lat_rad))) * (180 / math.pi)
    
    # Calculate bounding box coordinates
    btm = lat - lat_offset
    lft = lon - lon_offset
    top = lat + lat_offset
    rgt = lon + lon_offset
    
    return btm, lft, top, rgt

In [12]:
keyword = "종로구 무악동"

In [13]:


url = "https://m.land.naver.com/search/result/{}".format(keyword)

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Referer': 'https://m.land.naver.com/'
}

res = requests.get(url, headers=headers)
res.raise_for_status()

soup = str(BeautifulSoup(res.text, 'lxml'))

# filter 값 추출
value = soup.split("filter: {")[1].split("}")[0].replace(" ","").replace("'","")

print(value)

lat = (float)(value.split("lat:")[1].split(",")[0])
lon = (float)(value.split("lon:")[1].split(",")[0])
z = (int)(value.split("z:")[1].split(",")[0])
cortarNo = value.split("cortarNo:")[1].split(",")[0]
rletTpCds = value.split("rletTpCds:")[1].split(",")[0]
tradTpCds = value.split("tradTpCds:")[1].split()[0]


#임의의 btm, lft, top, rgt값 계산
btm, lft, top, rgt = calculate_bounding_box(lat, lon, z)
print(f"btm: {btm}, lft: {lft}, top: {top}, rgt: {rgt}")


# clusterList URL
clusterList_URL = "https://m.land.naver.com/cluster/clusterList?view=atcl&cortarNo={}&rletTpCd={}&tradTpCd={}&z={}&lat={}&lon={}&btm={}&lft={}&top={}&rgt={}"\
    .format(cortarNo, rletTpCds, tradTpCds, z, lat, lon, btm, lft, top, rgt)
print(clusterList_URL)

# clusterList URL로 GET 요청
res2 = requests.get(clusterList_URL, headers=headers)
res2.raise_for_status()
# print(res2.text)

# 데이터 파싱
try:
    data = res2.json()
    # print(json.dumps(data, indent=2, ensure_ascii=False))
except json.JSONDecodeError as e:
    print("JSON 데이터를 파싱할 수 없습니다:", e)
    print("응답 내용:", res2.text)
    
values = data['data']['ARTICLE']
# print(values)

result_list = []
# 큰 원으로 구성되어 있는 전체 매물그룹(values)을 load 하여 한 그룹씩 세부 쿼리 진행
for v in values:
    lgeo = v['lgeo']
    count = v['count']
    z2 = v['z']
    lat2 = v['lat']
    lon2 = v['lon']

    len_pages = math.ceil(count / 20) + 1
    for idx in range(1, len_pages):

        # articleList_URL    
        articleList_URL = "https://m.land.naver.com/cluster/ajax/articleList?""itemId={}&mapKey=&lgeo={}&showR0=&" \
               "rletTpCd={}&tradTpCd={}&z={}&lat={}&""lon={}&totCnt={}&cortarNo={}&page={}"\
            .format(lgeo, lgeo, rletTpCds, tradTpCds, z2, lat2, lon2, count,cortarNo, idx)
        # print(articleList_URL)
        res3 = requests.get(articleList_URL, headers=headers)
        res3.raise_for_status()
                
        try:
            data2 = res3.json()
            articles = data2['body']
            for article in articles:
                result_list.append(article)
        except json.JSONDecodeError as e:
            print("JSON 데이터를 파싱할 수 없습니다:", e)
            print("응답 내용:", res3.text)


df = pd.DataFrame(result_list)

selected_columns = ["atclNo", "atclNm", "rletTpNm","tradTpNm", "flrInfo", "prc","rentPrc","hanPrc","spc1","spc2", "direction","atclCfmYmd" ,"lat", "lng", "atclFetrDesc", "tagList","bildNm","town"]
df["town"] = keyword
df = df[selected_columns].reset_index(drop=True)

print(len(df))

df.to_csv(f'./dataNop/{keyword}_crawling.csv', index=False, encoding='utf-8-sig')
print("CSV 파일로 저장되었습니다.")
            
        


lat:37.576008,
lon:126.958098,
z:14,
cortarNo:1111018700,
cortarNm:무악동,
rletTpCds:*,
tradTpCds:A1:B1:B2

btm: 37.567024847158805, lft: 126.9467634359103, top: 37.5849911528412, rgt: 126.96943256408971
https://m.land.naver.com/cluster/clusterList?view=atcl&cortarNo=1111018700&rletTpCd=*&tradTpCd=A1:B1:B2&z=14&lat=37.576008&lon=126.958098&btm=37.567024847158805&lft=126.9467634359103&top=37.5849911528412&rgt=126.96943256408971
141
CSV 파일로 저장되었습니다.


### 데이터 전처리 

In [41]:
#csv 합치기
import os

folder_path = './dataNop'

# 폴더 내 모든 CSV 파일을 불러와 하나의 데이터 프레임으로 합치는 함수
def merge_csv_files(folder_path):
    # 폴더 내 모든 CSV 파일 리스트 가져오기
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    print(csv_files)

    # 빈 데이터 프레임 리스트 생성
    dataframes = []

    # 각 CSV 파일을 읽어서 데이터 프레임 리스트에 추가
    for file in csv_files:
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path)
        dataframes.append(df)

    # 모든 데이터 프레임을 하나로 합치기
    merged_df = pd.concat(dataframes, ignore_index=True)

    return merged_df

# 함수 호출 및 결과 저장
merged_df = merge_csv_files(folder_path)

# 결과 출력
merged_df.head()

['강남구개포1동_crawling.csv', '강남구개포2동_crawling.csv', '강남구개포4동_crawling.csv', '강남구논현1동_crawling.csv', '강남구논현2동_crawling.csv', '강남구대치1동_crawling.csv', '강남구대치2동_crawling.csv', '강남구대치4동_crawling.csv', '강남구도곡1동_crawling.csv', '강남구도곡2동_crawling.csv', '강남구삼성1동_crawling.csv', '강남구삼성2동_crawling.csv', '강남구세곡동_crawling.csv', '강남구수서동_crawling.csv', '강남구신사동_crawling.csv', '강남구압구정동_crawling.csv', '강남구역삼1동_crawling.csv', '강남구역삼2동_crawling.csv', '강남구일원1동_crawling.csv', '강남구일원본동_crawling.csv', '강남구청담동_crawling.csv']


Unnamed: 0,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,direction,atclCfmYmd,lat,lng,atclFetrDesc,tagList,bildNm,town
0,2430493738,디에이치퍼스티어아이파크,아파트,전세,8/26,175000,0,"17억 5,000",144,112.85,남동향,24.06.21.,37.482462,127.056707,44 1획지 주인직접 초역세권 식세 초중품아 채광.전망굿 고급커뮤니티,"['2년이내', '대단지', '대형평수', '방네개이상']",118동,강남구개포1동
1,2430483040,디에이치퍼스티어아이파크,아파트,월세,17/35,50000,440,5억,125,96.82,남서향,24.06.21.,37.47976,127.058636,"38. 커뮤니티 최상. 조중석식.풀에,이태리산 아트월, 식세기 옵션","['2년이내', '융자금없는', '대단지', '방네개이상']",174동,강남구개포1동
2,2430483852,디에이치퍼스티어아이파크,아파트,전세,22/33,290000,0,29억,201,156.96,남동향,24.06.21.,37.481182,127.057478,"61.대형 평형 다량보유.중앙공원뷰.이태리아트월,붙박이장,오븐,식세기","['2년이내', '융자금없는', '대단지', '대형평수']",103동,강남구개포1동
3,2430476863,디에이치퍼스티어아이파크,아파트,전세,16/18,68000,0,"6억 8,000",50,34.99,남서향,24.06.21.,37.478622,127.056892,"15 주인의뢰,귀한소형, 탁트인뷰, 럭셔리커뮤니티","['2년이내', '대단지', '소형평수', '방한개']",140동,강남구개포1동
4,2430461819,디에이치퍼스티어아이파크,아파트,월세,30/33,100000,600,10억,201,156.96,남동향,24.06.21.,37.481182,127.057478,"61 빠른입주,보증금10억에서 부터가능","['2년이내', '융자금없는', '대단지', '대형평수']",103동,강남구개포1동


In [42]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34270 entries, 0 to 34269
Data columns (total 18 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   atclNo        34270 non-null  int64  
 1   atclNm        34270 non-null  object 
 2   rletTpNm      34270 non-null  object 
 3   tradTpNm      34270 non-null  object 
 4   flrInfo       34270 non-null  object 
 5   prc           34270 non-null  int64  
 6   rentPrc       34270 non-null  int64  
 7   hanPrc        34264 non-null  object 
 8   spc1          34270 non-null  object 
 9   spc2          34270 non-null  float64
 10  direction     32542 non-null  object 
 11  atclCfmYmd    34270 non-null  object 
 12  lat           34270 non-null  float64
 13  lng           34270 non-null  float64
 14  atclFetrDesc  32199 non-null  object 
 15  tagList       34270 non-null  object 
 16  bildNm        9106 non-null   object 
 17  town          34270 non-null  object 
dtypes: float64(3), int64(3), o

In [43]:
#중복 제거

merged_df.drop_duplicates()
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34270 entries, 0 to 34269
Data columns (total 18 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   atclNo        34270 non-null  int64  
 1   atclNm        34270 non-null  object 
 2   rletTpNm      34270 non-null  object 
 3   tradTpNm      34270 non-null  object 
 4   flrInfo       34270 non-null  object 
 5   prc           34270 non-null  int64  
 6   rentPrc       34270 non-null  int64  
 7   hanPrc        34264 non-null  object 
 8   spc1          34270 non-null  object 
 9   spc2          34270 non-null  float64
 10  direction     32542 non-null  object 
 11  atclCfmYmd    34270 non-null  object 
 12  lat           34270 non-null  float64
 13  lng           34270 non-null  float64
 14  atclFetrDesc  32199 non-null  object 
 15  tagList       34270 non-null  object 
 16  bildNm        9106 non-null   object 
 17  town          34270 non-null  object 
dtypes: float64(3), int64(3), o

#### 1. 날짜 형식 변경

In [44]:
from datetime import datetime


In [None]:
def convert_to_mysql_format(date_str):
    date_obj = datetime.strptime(date_str, "%y.%m.%d.")
    return date_obj.strftime("%Y-%m-%d")

# 'atclCfmYmd' 열의 값을 MySQL 형식으로 변환
df['atclCfmYmd_mysql'] = df['atclCfmYmd'].apply(convert_to_mysql_format)
print("MySQL 형식으로 변환된 데이터 프레임:\n", df)

In [45]:
def convert_to_mongodb_format(date_str):
    date_obj = datetime.strptime(date_str, "%y.%m.%d.")
    return date_obj.isoformat()

# 'atclCfmYmd' 열의 값을 MongoDB 형식으로 변환
merged_df['atclCfmYmd'] = merged_df['atclCfmYmd'].apply(convert_to_mongodb_format)
merged_df.head()

Unnamed: 0,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,direction,atclCfmYmd,lat,lng,atclFetrDesc,tagList,bildNm,town
0,2430493738,디에이치퍼스티어아이파크,아파트,전세,8/26,175000,0,"17억 5,000",144,112.85,남동향,2024-06-21T00:00:00,37.482462,127.056707,44 1획지 주인직접 초역세권 식세 초중품아 채광.전망굿 고급커뮤니티,"['2년이내', '대단지', '대형평수', '방네개이상']",118동,강남구개포1동
1,2430483040,디에이치퍼스티어아이파크,아파트,월세,17/35,50000,440,5억,125,96.82,남서향,2024-06-21T00:00:00,37.47976,127.058636,"38. 커뮤니티 최상. 조중석식.풀에,이태리산 아트월, 식세기 옵션","['2년이내', '융자금없는', '대단지', '방네개이상']",174동,강남구개포1동
2,2430483852,디에이치퍼스티어아이파크,아파트,전세,22/33,290000,0,29억,201,156.96,남동향,2024-06-21T00:00:00,37.481182,127.057478,"61.대형 평형 다량보유.중앙공원뷰.이태리아트월,붙박이장,오븐,식세기","['2년이내', '융자금없는', '대단지', '대형평수']",103동,강남구개포1동
3,2430476863,디에이치퍼스티어아이파크,아파트,전세,16/18,68000,0,"6억 8,000",50,34.99,남서향,2024-06-21T00:00:00,37.478622,127.056892,"15 주인의뢰,귀한소형, 탁트인뷰, 럭셔리커뮤니티","['2년이내', '대단지', '소형평수', '방한개']",140동,강남구개포1동
4,2430461819,디에이치퍼스티어아이파크,아파트,월세,30/33,100000,600,10억,201,156.96,남동향,2024-06-21T00:00:00,37.481182,127.057478,"61 빠른입주,보증금10억에서 부터가능","['2년이내', '융자금없는', '대단지', '대형평수']",103동,강남구개포1동


#### 2. 아파트, 오피스텔 나누기
경도, 위도를 활용해 도로명주소, 지번주소 추가

In [46]:
apartment_df = merged_df[merged_df['rletTpNm'] == '아파트']
officetel_df = merged_df[merged_df['rletTpNm'] == '오피스텔']

In [65]:
officetel_df.to_csv('./dataNop/officetel.csv', index=False, encoding='utf-8-sig')

In [62]:
officetel_df

Unnamed: 0,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,direction,atclCfmYmd,lat,lng,atclFetrDesc,tagList,bildNm,town,road_address,address
458,2429962181,렉스힐,오피스텔,매매,2/7,41000,0,"4억 1,000",45,29.25,남동향,2024-06-18T00:00:00,37.483014,127.062559,임대안고 매매 구룹과외등 공부방으로 최적,"['15년이내', '필로티', '소형평수', '방한개']",1동,강남구개포1동,,
1175,2429962181,렉스힐,오피스텔,매매,2/7,41000,0,"4억 1,000",45,29.25,남동향,2024-06-18T00:00:00,37.483014,127.062559,임대안고 매매 구룹과외등 공부방으로 최적,"['15년이내', '필로티', '소형평수', '방한개']",1동,강남구개포2동,,
2466,2430511404,강남역서희스타힐스,오피스텔,매매,9/12,27000,0,"2억 7,000",65,29.88,동향,2024-06-21T00:00:00,37.491413,127.032589,,"['15년이내', '화장실한개', '방한개', '고층']",1동,강남구개포4동,,
2468,2430373834,강남웅진베어스빌,오피스텔,월세,6/16,10000,110,1억,48,26.26,북향,2024-06-20T00:00:00,37.490213,127.032788,"전입대출,보증보험가능,층고높은복층","['10년이내', '복층', '소형평수']",1동,강남구개포4동,,
2470,2430327129,강남웅진베어스빌,오피스텔,전세,6/16,32000,0,"3억 2,000",48,26.26,북향,2024-06-20T00:00:00,37.490213,127.032788,"전입대출,보증보험가능,층고높은복층","['10년이내', '복층', '소형평수']",1동,강남구개포4동,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34187,2427092490,리버뷰,오피스텔,월세,14/15,1000,80,1000,52,30.36,남서향,2024-06-03T00:00:00,37.522278,127.055325,초역세권 청담공원전망 개별냉난방 업무시설,"['25년이상', '융자금없는', '역세권', '화장실한개']",1동,강남구청담동,,
34203,2429060192,더오키드청담,오피스텔,월세,13/17,20000,700,2억,94,69.17,동향,2024-06-13T00:00:00,37.522677,127.055081,소유주직접의뢰 한강뷰 공원뷰 청담역초역세권 건조기 스타일러 식세기,"['2년이내', '역세권', '방한개']",1동,강남구청담동,,
34214,2428816002,더오키드청담,오피스텔,매매,6/17,245000,0,"24억 5,000",94,69.17,북동향,2024-06-12T00:00:00,37.522677,127.055081,청담역 영동대교 남단 신축 하이엔드 오피스텔,"['2년이내', '역세권', '방두개']",1동,강남구청담동,,
34244,2426348898,리버뷰,오피스텔,월세,14/15,1000,100,1000,68,39.16,서향,2024-05-29T00:00:00,37.522278,127.055325,대로변 업무용 오피스텔,"['25년이상', '융자금없는', '역세권', '방한개']",1동,강남구청담동,,


In [52]:
apartment_df['road_address'] = None
apartment_df['address'] = None

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apartment_df['road_address'] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  apartment_df['address'] = None


In [53]:
officetel_df['road_address'] = None
officetel_df['address'] = None

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  officetel_df['road_address'] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  officetel_df['address'] = None


In [54]:
officetel_df.head()

Unnamed: 0,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,direction,atclCfmYmd,lat,lng,atclFetrDesc,tagList,bildNm,town,road_address,address
458,2429962181,렉스힐,오피스텔,매매,2/7,41000,0,"4억 1,000",45,29.25,남동향,2024-06-18T00:00:00,37.483014,127.062559,임대안고 매매 구룹과외등 공부방으로 최적,"['15년이내', '필로티', '소형평수', '방한개']",1동,강남구개포1동,,
1175,2429962181,렉스힐,오피스텔,매매,2/7,41000,0,"4억 1,000",45,29.25,남동향,2024-06-18T00:00:00,37.483014,127.062559,임대안고 매매 구룹과외등 공부방으로 최적,"['15년이내', '필로티', '소형평수', '방한개']",1동,강남구개포2동,,
2466,2430511404,강남역서희스타힐스,오피스텔,매매,9/12,27000,0,"2억 7,000",65,29.88,동향,2024-06-21T00:00:00,37.491413,127.032589,,"['15년이내', '화장실한개', '방한개', '고층']",1동,강남구개포4동,,
2468,2430373834,강남웅진베어스빌,오피스텔,월세,6/16,10000,110,1억,48,26.26,북향,2024-06-20T00:00:00,37.490213,127.032788,"전입대출,보증보험가능,층고높은복층","['10년이내', '복층', '소형평수']",1동,강남구개포4동,,
2470,2430327129,강남웅진베어스빌,오피스텔,전세,6/16,32000,0,"3억 2,000",48,26.26,북향,2024-06-20T00:00:00,37.490213,127.032788,"전입대출,보증보험가능,층고높은복층","['10년이내', '복층', '소형평수']",1동,강남구개포4동,,


In [48]:
import requests
import pandas as pd
import json
import xml.etree.ElementTree as ET
from dotenv import load_dotenv
import os

In [58]:
# Kakao API를 사용하여 좌표를 주소로 변환하는 함수
def reverse_geocode(lat, lon):
    url = "https://dapi.kakao.com/v2/local/geo/coord2address.json"
    headers = {"Authorization": f"KakaoAK 2c032c6e9e2fe1461d1df5cd1ebc6fbe"}
    params = {"x": lon, "y": lat, "input_coord": "WGS84"}
    
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 200:
        result = response.json()
        if result['meta']['total_count'] > 0:
            address = result['documents'][0]
            road_address = address.get('road_address')
            if road_address:
                return road_address['address_name'], address['address']['address_name']
            else:
                return None, address['address']['address_name']
        else:
            return None, "No address found"
    else:
        return None, f"Error: {response.status_code}"

In [59]:
#주소기반산업서비스 API
def get_road_address_from_juso(address):
    api_url = f"http://business.juso.go.kr/addrlink/addrLinkApi.do?currentPage=1&countPerPage=10&keyword={address}&confmKey=devU01TX0FVVEgyMDI0MDUyOTIwMTYwODExNDgwNTM="
    response = requests.get(api_url)
    
    if response.status_code == 200:
        tree = ET.ElementTree(ET.fromstring(response.content))
        root = tree.getroot()
        road_addr = root.find(".//roadAddrPart1")
        if road_addr is not None:
            return road_addr.text
        else:
            return None
    else:
        return None

In [60]:
# 각 좌표에 대해 API 호출 및 주소 추가
for idx, row in apartment_df.iterrows():
    lat = row['lat']
    lon = row['lng']
    road_addr, addr = reverse_geocode(lat, lon)
    if road_addr is None and addr is not None:
        road_addr = get_road_address_from_juso(addr)
    apartment_df.at[idx, 'road_address'] = road_addr
    apartment_df.at[idx, 'address'] = addr

# 결과를 새로운 CSV 파일로 저장
apartment_df.to_csv('Apartment_with_address.csv', index=False)

ConnectTimeout: HTTPConnectionPool(host='business.juso.go.kr', port=80): Max retries exceeded with url: /addrlink/addrLinkApi.do?currentPage=1&countPerPage=10&keyword=Error:%20401&confmKey=devU01TX0FVVEgyMDI0MDUyOTIwMTYwODExNDgwNTM= (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x000001C529DBBC10>, 'Connection to business.juso.go.kr timed out. (connect timeout=None)'))

In [61]:
apartment_df

Unnamed: 0,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,direction,atclCfmYmd,lat,lng,atclFetrDesc,tagList,bildNm,town,road_address,address
0,2430493738,디에이치퍼스티어아이파크,아파트,전세,8/26,175000,0,"17억 5,000",144,112.85,남동향,2024-06-21T00:00:00,37.482462,127.056707,44 1획지 주인직접 초역세권 식세 초중품아 채광.전망굿 고급커뮤니티,"['2년이내', '대단지', '대형평수', '방네개이상']",118동,강남구개포1동,,Error: 401
1,2430483040,디에이치퍼스티어아이파크,아파트,월세,17/35,50000,440,5억,125,96.82,남서향,2024-06-21T00:00:00,37.479760,127.058636,"38. 커뮤니티 최상. 조중석식.풀에,이태리산 아트월, 식세기 옵션","['2년이내', '융자금없는', '대단지', '방네개이상']",174동,강남구개포1동,,Error: 401
2,2430483852,디에이치퍼스티어아이파크,아파트,전세,22/33,290000,0,29억,201,156.96,남동향,2024-06-21T00:00:00,37.481182,127.057478,"61.대형 평형 다량보유.중앙공원뷰.이태리아트월,붙박이장,오븐,식세기","['2년이내', '융자금없는', '대단지', '대형평수']",103동,강남구개포1동,,Error: 401
3,2430476863,디에이치퍼스티어아이파크,아파트,전세,16/18,68000,0,"6억 8,000",50,34.99,남서향,2024-06-21T00:00:00,37.478622,127.056892,"15 주인의뢰,귀한소형, 탁트인뷰, 럭셔리커뮤니티","['2년이내', '대단지', '소형평수', '방한개']",140동,강남구개포1동,,Error: 401
4,2430461819,디에이치퍼스티어아이파크,아파트,월세,30/33,100000,600,10억,201,156.96,남동향,2024-06-21T00:00:00,37.481182,127.057478,"61 빠른입주,보증금10억에서 부터가능","['2년이내', '융자금없는', '대단지', '대형평수']",103동,강남구개포1동,,Error: 401
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34253,2428643976,삼성청담공원,아파트,월세,13/15,20000,400,2억,132,107.22,남서향,2024-06-11T00:00:00,37.522400,127.050597,"올수리 깨끗 ,공원인접 ,보증금조정가능, 청담역세권,빠른입주가능","['25년이내', '올수리', '역세권', '대형평수']",105동,강남구청담동,,
34260,2428619763,삼성청담공원,아파트,월세,13/15,10000,450,1억,132,107.22,남서향,2024-06-11T00:00:00,37.522400,127.050597,채광좋은 남향 내부깨끗 보증금낮은 월세 청담공원인근,"['25년이내', '역세권', '대형평수', '방네개이상']",105동,강남구청담동,,
34262,2428669451,청담어퍼하우스,아파트,매매,4/7,800000,0,80억,225,197.73,남향,2024-06-11T00:00:00,37.520901,127.050220,청담동 신축급 고급빌라 어퍼하우스 즉시입주가능,"['10년이내', '역세권', '대형평수']",1동,강남구청담동,,
34264,2427714773,청담대우유로카운티,아파트,매매,고/17,320000,0,32억,141,120.89,남동향,2024-06-06T00:00:00,37.523330,127.049406,전체수리 스카이뷰시원한 조망권 협의입주,"['25년이내', '대형평수', '방네개이상', '화장실두개']",103동,강남구청담동,,


In [7]:
df.head()

Unnamed: 0,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,direction,atclCfmYmd,lat,lng,atclFetrDesc,tagList,bildNm,town
0,2430161062,중소형사무실,사무실,월세,3/5,3000,210,3000,100,89.5,서향,24.06.20.,37.510138,127.085401,"전용27, 잠실새내역세, 주차2대","[25년이상, 융자금적은, 역세권]",,송파구 잠실7동
1,2429217025,중소형사무실,사무실,월세,3/5,3000,210,3000,92,92.0,남동향,24.06.18.,37.510138,127.085401,"실사진O 잠실새내역 2분, 깔끔한 사무실, 전용27","[25년이상, 역세권, 중층]",,송파구 잠실7동
2,2429335888,일반상가,상가,월세,1/4,10000,450,1억,119,117.0,북향,24.06.17.,37.509996,127.085023,잠실새내역 먹자골목 야장있는 예쁜 호프집무권리,"[25년이상, 역세권, 1층]",,송파구 잠실7동
3,2430435227,쌍용더플래티넘잠실,오피스텔,월세,6/16,1000,110,1000,50,27.07,북향,24.06.21.,37.510994,127.084369,ON I 제일넓은타입 융자없이깔끔한집,"[2년이내, 융자금없는, 역세권]",101동,송파구 잠실7동
4,2430450003,다가구,단독/다가구,매매,3/B1,340000,0,34억,265,615.43,동향,24.06.21.,37.50793,127.083041,잠실동 초급매 다가구,"[25년이내, 급매, 방네개이상, 화장실네개이상]",,송파구 잠실7동


In [7]:
selected_columns = ["atclNo", "atclNm", "rletTpNm","tradTpNm", "flrInfo", "prc","rentPrc","hanPrc","spc1","spc2", "direction", "lat", "lng", "atclFetrDesc", "tagList","bildNm","town"]
df["town"] = keyword
df = df[selected_columns].reset_index(drop=True)

In [11]:
df.head(1)

Unnamed: 0,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,direction,lat,lng,atclFetrDesc,tagList,bildNm,town
0,2427003676,빌딩,건물,월세,7/B2,134000,9500,"13억 4,000",666,2513.61,,37.511249,127.113144,MIDAS 신축 건물 통임대 임대조건 협의가능,"[2년이내, 역세권]",,송파구 잠실4동


In [14]:
df.to_csv(f'./dataNop/{keyword}_crawling.csv', index=False, encoding='utf-8-sig')

In [9]:
df

Unnamed: 0,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,direction,lat,lng,atclFetrDesc,tagList,bildNm,town
0,2427408730,신구,오피스텔,월세,10/13,2000,65,2000,54,36.48,서향,37.538545,127.135674,강동역 7분거리,"[25년이상, 역세권, 소형평수, 방한개]",1동,송파구 풍납1동
1,2427477455,강동역신동아파밀리에(주상복합),아파트,매매,29/41,130000,0,13억,144,107.34,남향,37.536616,127.133071,확트인 월드타워뷰. 남향. 강동역바로연결. 편리한생활보장.,"[10년이내, 역세권, 대형평수, 방네개이상]",1101동,송파구 풍납1동
2,2427401843,진넥스빌Ⅲ(주상복합),아파트,매매,3/15,70000,0,7억,88,75.43,남향,37.536136,127.132805,특올수리. 베란다. 수납넉넉. 층간소음걱정NO. 강동역바로,"[25년이내, 올수리, 역세권]",1동,송파구 풍납1동
3,2427331089,래미안강동팰리스(주상복합),아파트,매매,3/45,133000,0,"13억 3,000",121,84.97,남서향,37.537341,127.132464,사계절 예쁜뷰. 호텔급 커뮤니티시설. 헬스사우나무료 .갭투.보안철저,"[10년이내, 역세권, 필로티, 방세개]",103동,송파구 풍납1동
4,2427354478,강동역신동아파밀리에(주상복합),아파트,전세,8/41,82000,0,"8억 2,000",144,107.34,남향,37.536763,127.133567,올수리. 풀시스템에어컨. 남향. 강동역 바로연결.,"[10년이내, 올수리, 역세권, 대형평수]",1102동,송파구 풍납1동
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1815,2424009993,일반원룸,원룸,월세,9/15,9000,20,9000,24,15.12,북동향,37.536635,127.136189,5호선 강동 초역세 강동성심병원 앞 수납공간 많은 풀옵션,"[15년이내, 융자금없는, 역세권, 소형평수]",,송파구 풍납1동
1816,2424022895,래미안강동팰리스(주상복합),아파트,매매,중/45,136000,0,"13억 6,000",121,84.97,남서향,37.537341,127.132464,월드타워뷰. 편리한교통. 골프.헬스.사우나. 게스트룸.,"[10년이내, 역세권, 방세개, 화장실두개]",103동,송파구 풍납1동
1817,2424022263,래미안강동팰리스(주상복합),아파트,매매,중/45,135000,0,"13억 5,000",121,84.97,북서향,37.537341,127.132464,상태최상.트인뷰. 편리한교통. 다양한커뮤니티,"[10년이내, 역세권, 방세개, 화장실두개]",103동,송파구 풍납1동
1818,2423995897,빌라,빌라,전세,중/6,36000,0,"3억 6,000",55,49.52,남향,37.531958,127.139537,내부깨끗 지정주차 지분넓은 안전한전세,"[15년이내, 융자금없는, 소형평수, 방세개]",,송파구 풍납1동


Unnamed: 0,atclNo,cortarNo,atclNm,atclStatCd,rletTpCd,uprRletTpCd,rletTpNm,tradTpCd,tradTpNm,vrfcTpCd,...,dtlAddr,sameAddrPremMin,sameAddrPremMax,repImgUrl,repImgTpCd,repImgThumb,sameAddrMaxPrc2,sameAddrMinPrc2,town,sellrNm
0,2426329376,1168010300,디에이치퍼스티어아이파크,R0,A01,A01,아파트,B1,전세,NDOC1,...,,,,,,,,,강남구 개포동,
1,2426319116,1168010300,디에이치퍼스티어아이파크,R0,A01,A01,아파트,A1,매매,NDOC1,...,,,,,,,,,강남구 개포동,
2,2426330636,1168010300,디에이치퍼스티어아이파크,R0,A01,A01,아파트,B1,전세,NDOC1,...,,,,,,,,,강남구 개포동,
3,2426292674,1168010300,래미안블레스티지,R0,A01,A01,아파트,B1,전세,OWNER,...,,,,/20240529_150/land_naver_1716954134244cK1vF_JP...,10,f130_98,,,강남구 개포동,
4,2426303207,1168010300,디에이치퍼스티어아이파크,R0,A01,A01,아파트,B2,월세,NDOC1,...,,,,,,,400,500,강남구 개포동,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24762,2422527668,1168010100,대명벨리온(도시형),R0,A01,A01,아파트,B2,월세,OWNER,...,,,,/20240516_58/land_naver_1715846838107XrkpK_JPE...,10,f130_98,,,강남구 역삼동,
24763,2425514914,1168010100,뜨라네(828-76),R0,A01,A01,아파트,B2,월세,DOC,...,,,,,,,260,300,강남구 역삼동,
24764,2422566461,1168010100,중소형사무실,R0,D01,D01,사무실,B2,월세,OWNER,...,,,,,,,,,강남구 역삼동,
24765,2422537172,1168010100,네스빌,R0,A02,A02,오피스텔,B1,전세,NDOC1,...,,,,/20240508_101/land_naver_1715153902606uqoUF_JP...,10,f130_98,,,강남구 역삼동,


In [154]:
df['id'] = range(1, len(df) + 1)

In [122]:
apartment_df = df[df['rletTpNm'] == '아파트']
officetel_df = df[df['rletTpNm'] == '오피스텔']

In [136]:
apartment_df

Unnamed: 0,atclNo,atclNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,direction,lat,lng,atclFetrDesc,tagList,bildNm,town,area,id
0,2426329376,디에이치퍼스티어아이파크,전세,11/32,320000,0,32억,220,171.65,남서향,37.481919,127.057858,66 O로얄동로얄층O조중석식O최대규모커뮤니티O근린공원O구룡역근접,"['2년이내', '대단지', '대형평수', '방네개이상']",104동,강남구 개포동,1,1
1,2426319116,디에이치퍼스티어아이파크,매매,21/26,430000,0,43억,144,112.85,남서향,37.478823,127.058843,44 뷰가 확 트여야 가치가 더 높습니다 입주가능한 매매 물건,"['2년이내', '대단지', '대형평수', '방네개이상']",146동,강남구 개포동,2,2
2,2426330636,디에이치퍼스티어아이파크,전세,12/35,250000,0,25억,169,132.81,남동향,37.480453,127.058339,51O뷰좋은집O커뮤니티바로연결O근린공원O생활편리동,"['2년이내', '대단지', '대형평수', '방네개이상']",101동,강남구 개포동,3,3
3,2426292674,래미안블레스티지,전세,4/8,105000,0,"10억 5,000",80,59.88,남서향,37.480127,127.063991,24 V풀옵션 V장기거주 V호텔식 조중식 V수영장사우나,"['10년이내', '융자금적은', '대단지']",223동,강남구 개포동,4,4
4,2426303207,디에이치퍼스티어아이파크,월세,22/33,100000,600,10억,201,156.96,남향,37.481182,127.057478,61O시그니처동O커뮤니티연결동O근린공원O뷰좋음,"['2년이내', '대단지', '대형평수', '방네개이상']",103동,강남구 개포동,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24759,2423623584,강남서해그랑블(주상복합),월세,중/10,40000,80,4억,94,70.57,남서향,37.493134,127.039216,공동불가 입점업소 2룸2욕실 뷰 컨디션 좋은호실 빠른계약예상,"['15년이내', '방두개', '화장실두개']",1동,강남구 역삼동,24760,5760
24761,2425505199,금호어울림,월세,중/15,40000,300,4억,155,133.77,남향,37.492233,127.034715,채광좋은남향 넓은평수 내부깨끗 주차편리 입주협의가능해요,"['25년이내', '융자금없는', '대형평수', '방네개이상']",101동,강남구 역삼동,24762,5761
24762,2422527668,대명벨리온(도시형),월세,3/7,1000,95,1000,27,17.68,북서향,37.497964,127.039165,"BEST역삼역도보10분이내,보안굿","['15년이내', '역세권', '소형평수']",1동,강남구 역삼동,24763,5762
24763,2425514914,뜨라네(828-76),월세,저/6,10000,260,1억,97,84.42,동향,37.497454,127.033053,쓰리룸 리모델링호실 빠른입주가능 내부깨끗 주차편리,"['25년이내', '융자금없는', '방세개', '화장실두개']",A동,강남구 역삼동,24764,5763


In [147]:
officetel_df.to_csv('GangnamOfficetel.csv', index=False, encoding='utf-8-sig')

In [111]:
filtered_df = df[~df['rletTpNm'].isin(['아파트', '오피스텔'])]

In [142]:
dfforadd = df

In [149]:
selected_columns = ["lat", "lng", "area"]
dfforadd = dfforadd[selected_columns]

In [191]:
df = df.drop(columns=['town'])

In [192]:
df.to_csv('GangnamApartment.csv', index=False, encoding='utf-8-sig')

In [173]:
df.isnull().sum()

id              0
area            0
atclNo          0
atclNm          0
rletTpNm        0
tradTpNm        0
flrInfo         0
prc             0
rentPrc         0
hanPrc          0
spc1            0
spc2            0
direction       0
atclFetrDesc    0
tagList         0
bildNm          0
dtype: int64

In [171]:
df = df.fillna(0)

In [174]:
df.to_csv(f'GangnamALL.csv', index=False, encoding='utf-8-sig')

In [88]:
df.info() # 데이터프레임의 정보 출력

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   atclNo    100 non-null    int64  
 1   atclNm    100 non-null    object 
 2   rletTpNm  100 non-null    object 
 3   prc       100 non-null    int64  
 4   lat       100 non-null    float64
 5   lng       100 non-null    float64
dtypes: float64(2), int64(2), object(2)
memory usage: 4.8+ KB


In [90]:
df.isnull().sum() # 데이터프레임의 결측치 정보 출력

atclNo      0
atclNm      0
rletTpNm    0
prc         0
lat         0
lng         0
dtype: int64

In [7]:
df = pd.read_csv('양천구_신월동_100.csv',encoding='utf-8')
df

Unnamed: 0,atclNo,atclNm,rletTpNm,prc,lat,lng
0,2426130767,목동센트럴아이파크위브,아파트,60000,37.517669,126.844850
1,2425438083,상가주택,상가주택,250000,37.517523,126.842308
2,2423609940,스카이캐슬,아파트,97000,37.520542,126.844645
3,2425722784,단독,단독/다가구,15000,37.518080,126.842841
4,2422558122,일반상가,상가,108000,37.517721,126.842002
...,...,...,...,...,...,...
95,2423233490,빌라,빌라,25500,37.521391,126.844122
96,2426060226,신성미소지움3차,아파트,63000,37.522890,126.841696
97,2425566480,금용,아파트,83000,37.520768,126.842091
98,2426059502,신정뉴타운롯데캐슬,아파트,81000,37.518923,126.847877


## 데이터 밸런스 맞추기

In [24]:
df = pd.read_csv('GangnamAdd.csv',encoding='utf-8')
df1 = pd.read_csv('GangnamALL.csv',encoding='utf-8')

In [21]:
df

Unnamed: 0,area,lat,lng,road_address,address
0,1,37.481919,127.057858,서울특별시 강남구 개포로 310,서울 강남구 개포동 660-1
1,2,37.478823,127.058843,서울특별시 강남구 개포로 310,서울 강남구 개포동 660-1
2,3,37.480453,127.058339,서울특별시 강남구 개포로 310,서울 강남구 개포동 660-1
3,4,37.480127,127.063991,서울특별시 강남구 선릉로 8,서울 강남구 개포동 1280
4,5,37.481182,127.057478,서울특별시 강남구 개포로 310,서울 강남구 개포동 660-1
...,...,...,...,...,...
24762,24763,37.497964,127.039165,서울특별시 강남구 역삼로25길 27,서울 강남구 역삼동 727-2
24763,24764,37.497454,127.033053,서울특별시 강남구 개포로 310,서울 강남구 역삼동 828-76
24764,24765,37.493709,127.038101,서울특별시 강남구 논현로67길 23,서울 강남구 역삼동 795-2
24765,24766,37.495225,127.032596,서울특별시 강남구 역삼로7길 17,서울 강남구 역삼동 830-41


In [25]:
df = df1.sample(frac=1).reset_index(drop=True)
df

Unnamed: 0,id,area,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,direction,atclFetrDesc,tagList,bildNm
0,20374,20374,2423542341,대형사무실,사무실,월세,1/5,25000,2300,"2억 5,000",561,561.31,남서향,0,"['10년이내', '1층', '주차가능', '천장에어컨']",0
1,20938,20938,2425941602,빌라,빌라,매매,2/2,85000,0,"8억 5,000",48,44.28,남향,0,"['25년이상', '역세권', '소형평수']",0
2,11749,11749,2425945043,중소형사무실,사무실,월세,B1/5,2000,160,2000,89,89.00,남향,스튜디오 추천,"['25년이내', '융자금없는', '지하층', '관리비10만원이하']",0
3,17651,17651,2424576763,푸른마을,아파트,전세,3/15,75000,0,"7억 5,000",103,84.93,남향,"주인직매물,조용일조권좋은남향,권리깨끗","['25년이상', '역세권', '방세개', '화장실두개']",102동
4,16198,16198,2422550993,미성2차,아파트,매매,14/17,300000,0,30억,95,74.40,동향,실매물 투자가치 유망한곳으로 매수적기 입니다,"['25년이상', '방세개', '화장실한개', '고층']",28동
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24762,14031,14031,2422035727,기타,원룸,월세,중/3,1000,90,1000,30,30.00,동향,신사역 가로수길 주차 가능한 원룸 월세,"['25년이상', '소형평수', '방한개']",0
24763,304,304,2424823564,디에이치퍼스티어아이파크,아파트,월세,8/21,30000,420,3억,110,84.82,남동향,33A 생활인프라 모두 근접한 로얄세대 보증금조정가능,"['2년이내', '융자금없는', '역세권', '대단지']",113동
24764,20152,20152,2424016436,청담아이파크,아파트,월세,중/19,120000,330,12억,137,110.13,동향,내부 에이급 리모델링 최상 한강뷰 멋진집입니다.,"['15년이내', '대형평수', '방네개이상', '화장실두개']",1동
24765,1023,1023,2426280490,디에이치퍼스티어아이파크,아파트,월세,중/26,60000,300,6억,110,84.82,남향,33 전망좋은 동 학세권 최고 커뮤니티 신축,"['2년이내', '대단지', '방세개', '화장실두개']",159동


In [27]:
df.to_csv('GangnamALL.csv', index=False, encoding='utf-8-sig')

In [None]:
df

In [6]:
df

Unnamed: 0,area,lat,lng,road_address,address
0,1,37.481919,127.057858,서울특별시 강남구 개포로 310,서울 강남구 개포동 660-1
1,2,37.478823,127.058843,서울특별시 강남구 개포로 310,서울 강남구 개포동 660-1
2,3,37.480453,127.058339,서울특별시 강남구 개포로 310,서울 강남구 개포동 660-1
3,4,37.480127,127.063991,서울특별시 강남구 선릉로 8,서울 강남구 개포동 1280
4,5,37.481182,127.057478,서울특별시 강남구 개포로 310,서울 강남구 개포동 660-1
...,...,...,...,...,...
24762,24763,37.497964,127.039165,서울특별시 강남구 역삼로25길 27,서울 강남구 역삼동 727-2
24763,24764,37.497454,127.033053,서울특별시 강남구 개포로 310,서울 강남구 역삼동 828-76
24764,24765,37.493709,127.038101,서울특별시 강남구 논현로67길 23,서울 강남구 역삼동 795-2
24765,24766,37.495225,127.032596,서울특별시 강남구 역삼로7길 17,서울 강남구 역삼동 830-41


In [7]:
df1

Unnamed: 0,id,area,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,direction,atclFetrDesc,tagList,bildNm
0,1,1,2426329376,디에이치퍼스티어아이파크,아파트,전세,11/32,320000,0,32억,220,171.65,남서향,66 O로얄동로얄층O조중석식O최대규모커뮤니티O근린공원O구룡역근접,"['2년이내', '대단지', '대형평수', '방네개이상']",104동
1,2,2,2426319116,디에이치퍼스티어아이파크,아파트,매매,21/26,430000,0,43억,144,112.85,남서향,44 뷰가 확 트여야 가치가 더 높습니다 입주가능한 매매 물건,"['2년이내', '대단지', '대형평수', '방네개이상']",146동
2,3,3,2426330636,디에이치퍼스티어아이파크,아파트,전세,12/35,250000,0,25억,169,132.81,남동향,51O뷰좋은집O커뮤니티바로연결O근린공원O생활편리동,"['2년이내', '대단지', '대형평수', '방네개이상']",101동
3,4,4,2426292674,래미안블레스티지,아파트,전세,4/8,105000,0,"10억 5,000",80,59.88,남서향,24 V풀옵션 V장기거주 V호텔식 조중식 V수영장사우나,"['10년이내', '융자금적은', '대단지']",223동
4,5,5,2426303207,디에이치퍼스티어아이파크,아파트,월세,22/33,100000,600,10억,201,156.96,남향,61O시그니처동O커뮤니티연결동O근린공원O뷰좋음,"['2년이내', '대단지', '대형평수', '방네개이상']",103동
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24762,24763,24763,2422527668,대명벨리온(도시형),아파트,월세,3/7,1000,95,1000,27,17.68,북서향,"BEST역삼역도보10분이내,보안굿","['15년이내', '역세권', '소형평수']",1동
24763,24764,24764,2425514914,뜨라네(828-76),아파트,월세,저/6,10000,260,1억,97,84.42,동향,쓰리룸 리모델링호실 빠른입주가능 내부깨끗 주차편리,"['25년이내', '융자금없는', '방세개', '화장실두개']",A동
24764,24765,24765,2422566461,중소형사무실,사무실,월세,2/6,2000,200,2000,129,129.60,남서향,사진O 층전체사용 주차가능 가성비 좋은 사무실,"['25년이상', '지상층(1층제외)', '관리비10만원이하', '주차가능']",0
24765,24766,24766,2422537172,네스빌,오피스텔,전세,2/9,19000,0,"1억 9,000",44,23.68,남동향,실물 더 넓은 테라스 굿 시세대비 저렴 빠른계약예상,"['25년이내', '테라스', '역세권']",1동


In [12]:
df = df.sample(frac=1).reset_index(drop=True)
df

Unnamed: 0,area,lat,lng,road_address,address,id,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,direction,atclFetrDesc,tagList,bildNm
0,23791,37.497110,127.037435,서울특별시 강남구 역삼로19길 29,서울 강남구 역삼동 745-12,23791,2425751350,일반상가,상가,월세,1/5,1000,220,1000,76,76.00,동향,좋은 매물만 엑기스로 추려드리겠습니다.,"['25년이상', '역세권', '1층', '관리비20만원이하']",0
1,22531,37.485625,127.035919,서울특별시 강남구 강남대로42길 23,서울 강남구 도곡동 954-18,22531,2420975917,일반상가,상가,월세,1/6,11000,980,"1억 1,000",250,132.00,남향,원하시는 매장 반드시 찾아 드리겠습니다.,"['2년이내', '융자금없는', '역세권', '1층']",0
2,17726,37.481545,127.078599,서울특별시 강남구 광평로10길 6,서울 강남구 일원동 731,17726,2421964775,한솔마을,아파트,매매,4/5,230000,0,23억,110,84.73,남향,확장 샤시교체 특올수리 입주가능 강력추천드립니다,"['25년이상', '올수리', '방세개', '화장실두개']",303동
3,893,37.479332,127.058767,서울특별시 강남구 개포로 310,서울 강남구 개포동 660-1,893,2426321631,디에이치퍼스티어아이파크,아파트,월세,중/35,90000,540,9억,169,132.81,남동향,0,"['2년이내', '대단지', '대형평수', '방네개이상']",173동
4,17234,37.490251,127.082356,서울특별시 강남구 양재대로31길 13,서울 강남구 일원동 686-3,17234,2421849895,단독,단독/다가구,월세,B1/2,2000,60,2000,40,40.00,남동향,"2룸 단독주택 반지하 월세, 보증금 조절 가능, 미닫이문 있음","['25년이상', '융자금없는', '소형평수', '방두개']",0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24762,23969,37.494933,127.035423,서울특별시 강남구 역삼로 149,서울 강남구 역삼동 751-7,23969,2423595052,중소형사무실,사무실,월세,1/5,23000,2300,"2억 3,000",732,732.20,남향,대로변 소형빌딩 임대 현병원운영중 원복협의,"['25년이상', '1층', '주차가능', '엘리베이터']",0
24763,22567,37.491811,127.040029,서울특별시 강남구 도곡로 188,서울 강남구 도곡동 544-2,22567,2420911223,일반상가,상가,월세,3/4,5000,300,5000,198,181.80,북서향,"강남BEST 도곡로대로변,가시성,노출도 좋은상가,시세대비 저렴한 월세","['25년이상', '고층', '주차가능', '총4층']",0
24764,24238,37.498924,127.033882,서울특별시 강남구 테헤란로16길 9,서울 강남구 역삼동 735-29,24238,2425134432,중소형사무실,사무실,월세,1/5,4000,600,4000,133,133.80,서향,실사진O 역삼역 S급 컨디션 인테리어 손 볼 곳 없는 사무실,"['25년이상', '역세권', '1층', '주차가능']",0
24765,14223,37.519372,127.019631,서울특별시 강남구 도산대로1길 40,서울 강남구 신사동 507-11,14223,2421547169,대형사무실,사무실,월세,3/6,4000,400,4000,200,160.00,서향,D실사진. 더블역세권 인테리어 냉난방기 완비. 자주식주차,"['25년이상', '역세권', '중층', '주차가능']",0


In [8]:
merged_df = pd.merge(df, df1, on='area')

In [10]:
rletTpNm_distribution = merged_df['rletTpNm'].value_counts()

In [None]:
rletTpNm_distribution = df1['rletTpNm'].value_counts()

In [11]:
rletTpNm_distribution

rletTpNm
사무실        6911
아파트        5764
상가         4919
빌라         2138
건물         1372
단독/다가구     1272
오피스텔        925
재건축         855
원룸          412
상가주택        124
토지           52
오피스텔분양권      13
아파트분양권        8
공장/창고         2
Name: count, dtype: int64

In [2]:
import pandas as pd

In [2]:
df = pd.read_csv('./dataNop/apartment.csv', encoding='utf-8')
df2 = pd.read_csv('./dataNop/officetel.csv', encoding='utf-8')

In [4]:
df.head()

Unnamed: 0,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,...,atclCfmYmd,lat,lng,atclFetrDesc,tagList,bildNm,town,road_address,address,image
0,2430493738,디에이치퍼스티어아이파크,아파트,전세,8/26,175000,0,"17억 5,000",144,112.85,...,2024-06-21T00:00:00,37.482462,127.056707,44 1획지 주인직접 초역세권 식세 초중품아 채광.전망굿 고급커뮤니티,"['2년이내', '대단지', '대형평수', '방네개이상']",118동,강남구개포1동,,Error: 401,apartment.jpeg
1,2430483040,디에이치퍼스티어아이파크,아파트,월세,17/35,50000,440,5억,125,96.82,...,2024-06-21T00:00:00,37.47976,127.058636,"38. 커뮤니티 최상. 조중석식.풀에,이태리산 아트월, 식세기 옵션","['2년이내', '융자금없는', '대단지', '방네개이상']",174동,강남구개포1동,,Error: 401,apartment.jpeg
2,2430483852,디에이치퍼스티어아이파크,아파트,전세,22/33,290000,0,29억,201,156.96,...,2024-06-21T00:00:00,37.481182,127.057478,"61.대형 평형 다량보유.중앙공원뷰.이태리아트월,붙박이장,오븐,식세기","['2년이내', '융자금없는', '대단지', '대형평수']",103동,강남구개포1동,,Error: 401,apartment.jpeg
3,2430476863,디에이치퍼스티어아이파크,아파트,전세,16/18,68000,0,"6억 8,000",50,34.99,...,2024-06-21T00:00:00,37.478622,127.056892,"15 주인의뢰,귀한소형, 탁트인뷰, 럭셔리커뮤니티","['2년이내', '대단지', '소형평수', '방한개']",140동,강남구개포1동,,Error: 401,apartment.jpeg
4,2430461819,디에이치퍼스티어아이파크,아파트,월세,30/33,100000,600,10억,201,156.96,...,2024-06-21T00:00:00,37.481182,127.057478,"61 빠른입주,보증금10억에서 부터가능","['2년이내', '융자금없는', '대단지', '대형평수']",103동,강남구개포1동,,Error: 401,apartment.jpeg


In [8]:
import ast

# tagList 열의 각 문자열을 리스트로 변환
df['tagList'] = df['tagList'].apply(ast.literal_eval)

# tagList 열의 값을 모두 풀어 하나의 리스트로 만듦
all_tags = [tag for sublist in df['tagList'] for tag in sublist]

# 고유한 태그를 추출하여 리스트로 변환
unique_tags = list(set(all_tags))

# 결과 출력
print(unique_tags)

['4년이내', '탑층', '필로티', '세대당1대', '급매', '세대분리', '대형평수', '대단지', '확장형', '방두개', '역세권', '융자금적은', '중층', '올수리', '복층', '화장실세개', '마당', '테라스', '주차가능', '세대당 1.5대이상', '융자금없는', '2년이내', '관리비20만원이하', '화장실네개이상', '15년이내', '방네개이상', '25년이내', '1층', '소형평수', '저층', '10년이내', '주방교체', '욕실수리', '방한개', '화장실한개', '고층', '방세개', '25년이상', '화장실두개']


In [8]:
df2['image'] = df2['rletTpNm'].apply(lambda x: 'apartment.jpeg' if x == '아파트' else 'officetel.jpeg')

In [9]:
df2

Unnamed: 0,atclNo,atclNm,rletTpNm,tradTpNm,flrInfo,prc,rentPrc,hanPrc,spc1,spc2,...,atclCfmYmd,lat,lng,atclFetrDesc,tagList,bildNm,town,road_address,address,image
0,2429962181,렉스힐,오피스텔,매매,2/7,41000,0,"4억 1,000",45,29.25,...,2024-06-18T00:00:00,37.483014,127.062559,임대안고 매매 구룹과외등 공부방으로 최적,"['15년이내', '필로티', '소형평수', '방한개']",1동,강남구개포1동,,,officetel.jpeg
1,2429962181,렉스힐,오피스텔,매매,2/7,41000,0,"4억 1,000",45,29.25,...,2024-06-18T00:00:00,37.483014,127.062559,임대안고 매매 구룹과외등 공부방으로 최적,"['15년이내', '필로티', '소형평수', '방한개']",1동,강남구개포2동,,,officetel.jpeg
2,2430511404,강남역서희스타힐스,오피스텔,매매,9/12,27000,0,"2억 7,000",65,29.88,...,2024-06-21T00:00:00,37.491413,127.032589,,"['15년이내', '화장실한개', '방한개', '고층']",1동,강남구개포4동,,,officetel.jpeg
3,2430373834,강남웅진베어스빌,오피스텔,월세,6/16,10000,110,1억,48,26.26,...,2024-06-20T00:00:00,37.490213,127.032788,"전입대출,보증보험가능,층고높은복층","['10년이내', '복층', '소형평수']",1동,강남구개포4동,,,officetel.jpeg
4,2430327129,강남웅진베어스빌,오피스텔,전세,6/16,32000,0,"3억 2,000",48,26.26,...,2024-06-20T00:00:00,37.490213,127.032788,"전입대출,보증보험가능,층고높은복층","['10년이내', '복층', '소형평수']",1동,강남구개포4동,,,officetel.jpeg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1559,2427092490,리버뷰,오피스텔,월세,14/15,1000,80,1000,52,30.36,...,2024-06-03T00:00:00,37.522278,127.055325,초역세권 청담공원전망 개별냉난방 업무시설,"['25년이상', '융자금없는', '역세권', '화장실한개']",1동,강남구청담동,,,officetel.jpeg
1560,2429060192,더오키드청담,오피스텔,월세,13/17,20000,700,2억,94,69.17,...,2024-06-13T00:00:00,37.522677,127.055081,소유주직접의뢰 한강뷰 공원뷰 청담역초역세권 건조기 스타일러 식세기,"['2년이내', '역세권', '방한개']",1동,강남구청담동,,,officetel.jpeg
1561,2428816002,더오키드청담,오피스텔,매매,6/17,245000,0,"24억 5,000",94,69.17,...,2024-06-12T00:00:00,37.522677,127.055081,청담역 영동대교 남단 신축 하이엔드 오피스텔,"['2년이내', '역세권', '방두개']",1동,강남구청담동,,,officetel.jpeg
1562,2426348898,리버뷰,오피스텔,월세,14/15,1000,100,1000,68,39.16,...,2024-05-29T00:00:00,37.522278,127.055325,대로변 업무용 오피스텔,"['25년이상', '융자금없는', '역세권', '방한개']",1동,강남구청담동,,,officetel.jpeg


In [10]:
df.to_csv('./dataNop/apartment.csv', index=False, encoding='utf-8-sig')

In [11]:
df2.to_csv('./dataNop/officetel.csv', index=False, encoding='utf-8-sig')