# 불법주정차 데이터 위도/경도 수집 (Kakao Api)
- https://developers.kakao.com
- 카카오맵 api 허용 설정 필수

### Present working directory

In [None]:
!pwd #Linux

/Users/hoyun/Documents/GitHub/Data_projects/dacon_sri_suwon_data_analysis


In [22]:
!cd #windows

### 패키지 설치

In [2]:
!pip install pandas numpy tqdm requests



### 패키지 선언

In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import time
import json
import requests

### 카카오 API키 및 API 동작확인

In [None]:
api_key = "API KEY 입력해주세요"

In [5]:
addr = '서울시 송파구 위례광장로 185'
url = 'https://dapi.kakao.com/v2/local/search/address.json?query={address}'.format(address=addr)
headers = {
    "Authorization": f"KakaoAK {api_key}"
}
params = {"query": addr}

result = json.loads(str(requests.get(url, headers=headers).text))
display(result)

{'documents': [{'address': {'address_name': '서울 송파구 장지동 878',
    'b_code': '1171010900',
    'h_code': '1171064700',
    'main_address_no': '878',
    'mountain_yn': 'N',
    'region_1depth_name': '서울',
    'region_2depth_name': '송파구',
    'region_3depth_h_name': '위례동',
    'region_3depth_name': '장지동',
    'sub_address_no': '',
    'x': '127.14004016241',
    'y': '37.4804774688545'},
   'address_name': '서울 송파구 위례광장로 185',
   'address_type': 'ROAD_ADDR',
   'road_address': {'address_name': '서울 송파구 위례광장로 185',
    'building_name': '위례신도시 송파푸르지오',
    'main_building_no': '185',
    'region_1depth_name': '서울',
    'region_2depth_name': '송파구',
    'region_3depth_name': '장지동',
    'road_name': '위례광장로',
    'sub_building_no': '',
    'underground_yn': 'N',
    'x': '127.14004016241',
    'y': '37.4804774688545',
    'zone_no': '05848'},
   'x': '127.14004016241',
   'y': '37.4804774688545'}],
 'meta': {'is_end': True, 'pageable_count': 1, 'total_count': 1}}

### 데이터 불러오기

In [6]:
#10만개 단위(약 40만개)
data1 = pd.read_csv('parking_notcctv_1.csv')
data2 = pd.read_csv('parking_notcctv_2.csv')
data3 = pd.read_csv('parking_notcctv_3.csv')
data4 = pd.read_csv('parking_notcctv_4.csv')
data5 = pd.read_csv('parking_notcctv_5.csv')

### 데이터 수집 및 적용 함수

In [16]:
def get_lat_lng(addr):
    try:
        headers = {
        "Authorization": f"KakaoAK {api_key}"
        }
        params = {"query": addr}
        url = "https://dapi.kakao.com/v2/local/search/address.json"
        
        response = requests.get(url, headers=headers, params=params)
        time.sleep(0.15)
        if response.status_code == 200:
            documents = response.json().get("documents")
            if documents:
                return float(documents[0]["y"]), float(documents[0]["x"])  # 위도, 경도
        return None, None
    
    except Exception as e:
        print(f'get_lat_lng 함수 실행 중.. 에러 : {e}')
        return None, None


def add_to_pos(data, api_key, chunk_index=None):
    tqdm.pandas()

    latitudes = []
    longitudes = []

    for i,(_, row) in enumerate(tqdm(data.iterrows(), total=len(data))):
        try:
            if i>0 and i%1000 == 0:
                print(f'{i} rows. 중간 딜레이 5se\n')
                time.sleep(5)
        
            lat,lng = get_lat_lng(row['단속장소'])
            latitudes.append(lat)
            longitudes.append(lng)

            
        except Exception as e:
            print(f"add_to_pos 함수 실행 중.. {i}번째 줄 에러=>{e}")
            temp_data = data.iloc[:i].copy()
            temp_data['latitude'] = latitudes
            temp_data['longitude'] = longitudes
            backup_file = f"중단에러_백업파일_{chunk_index}_{i}_줄.csv"
            backup_file.to_csv(backup_file, index=False)
            print("중단시점까지 저장. 파일명>>>",backup_file)
            raise e
        

    #display(data.head(10))
    data['latitude'] = latitudes
    data['longitude'] = longitudes
    return data

In [19]:
#샘플 테스트
sample=data1.sample(10)
partial_data = sample

### 적용

In [None]:
chunks = np.array_split(partial_data, 10)
processed_chunks = []

for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i+1}/10")

    try:
        result = add_to_pos(chunk, api_key=api_key, chunk_index=i+1)

        result.to_csv(f'./backup_chunk/processed_chunk_data_[sample] {i+1}.csv', index=False)
        processed_chunks.append(result)
    except Exception as e:
        print(f"Loop 중단된 청크 : {i+1}, 발생 에러 : {e}")
        break
print('='*20)
print("데이터 처리 완료")
print('='*20)
    

  return bound(*args, **kwds)


Processing chunk 1/10


100%|██████████| 1/1 [00:00<00:00,  4.80it/s]


Processing chunk 2/10


100%|██████████| 1/1 [00:00<00:00,  4.64it/s]


Processing chunk 3/10


100%|██████████| 1/1 [00:00<00:00,  4.84it/s]


Processing chunk 4/10


100%|██████████| 1/1 [00:00<00:00,  3.45it/s]


Processing chunk 5/10


100%|██████████| 1/1 [00:00<00:00,  4.75it/s]


Processing chunk 6/10


100%|██████████| 1/1 [00:00<00:00,  3.56it/s]


Processing chunk 7/10


100%|██████████| 1/1 [00:00<00:00,  4.73it/s]


Processing chunk 8/10


100%|██████████| 1/1 [00:00<00:00,  4.78it/s]


Processing chunk 9/10


100%|██████████| 1/1 [00:00<00:00,  4.70it/s]


Processing chunk 10/10


100%|██████████| 1/1 [00:00<00:00,  4.53it/s]

데이터 처리 완료





### 실제 데이터 주소 -> 위/경도 전환 
시간 소요될 수 있음. 요청 당 0.15초 딜레이

In [None]:
#적용할 데이터
partial_data = 

In [None]:
chunks = np.array_split(partial_data, 10)
processed_chunks = []

for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i+1}/10")

    try:
        result = add_to_pos(chunk, api_key=api_key, chunk_index=i+1)

        result.to_csv(f'./backup_chunk/processed_chunk_data3 {i+1}.csv', index=False)
        processed_chunks.append(result)
    except Exception as e:
        print(f"중단된 청크 : {i+1}, 발생 에러 : {e}")
        break
    

In [None]:
final_data = pd.concat(processed_chunks, ignore_index=True)
final_data.to_csv('data_processed_data_by_who.csv', index=False)