In [1]:
import chardet
import pandas as pd
import numpy as np
from scipy.spatial import distance

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans

from sklearn.metrics import silhouette_score

def open_with_auto_sensing_encoding_types(file_path):
    with open(file_path, 'rb') as f:
        rawdata = f.read()
        result = chardet.detect(rawdata)
        encoding_type = result['encoding']

    print(f"파일의 인코딩: {encoding_type}")
    # 감지된 인코딩으로 파일 읽기
    df = pd.read_csv(file_path, encoding=encoding_type)
    print(len(df))
    return df

In [4]:
grid_path = "data/grid_parking_summary.csv"
grid_df = open_with_auto_sensing_encoding_types(grid_path)
grid_df

파일의 인코딩: UTF-8-SIG
11322


Unnamed: 0,center_lat,center_lon,주차장수,주차장정보
0,37.523725,127.056881,0.0,[]
1,37.524175,127.056879,0.0,[]
2,37.524626,127.056876,0.0,[]
3,37.525077,127.056873,0.0,[]
4,37.525527,127.056871,0.0,[]
...,...,...,...,...
11317,37.571696,127.113785,0.0,[]
11318,37.572147,127.113782,0.0,[]
11319,37.572598,127.113780,0.0,[]
11320,37.573048,127.113778,0.0,[]


In [22]:
non_na_grid_df = grid_df[grid_df["주차장정보"] != "[]"]
print(non_na_grid_df.head(2))

     center_lat  center_lon  주차장수  \
691   37.535004  127.060210   2.0   
799   37.533654  127.060784   1.0   

                                                 주차장정보  
691  [{'경도': 127.06046, '위도': 37.535041, '법정동코드': 1...  
799  [{'경도': 127.060665, '위도': 37.533727, '법정동코드': ...  


In [11]:
sample_data = non_na_grid_df["주차장정보"].head(1)
sample_data

691    [{'경도': 127.06046, '위도': 37.535041, '법정동코드': 1...
Name: 주차장정보, dtype: object

In [None]:
sample_data_list = sample_data.to_list()
sample_data_list

["[{'경도': 127.06046, '위도': 37.535041, '법정동코드': 10500, '주소': '서울특별시 광진구 자양동 210-23', '새주소-도로명': '동일로2길', '법정동명': '자양동'}, {'경도': 127.06045, '위도': 37.53513, '법정동코드': 10500, '주소': '서울특별시 광진구 자양동 210-22 대일카쎈타', '새주소-도로명': '동일로2길', '법정동명': '자양동'}]"]

In [23]:
import ast  # 문자열 형태의 리스트를 실제 리스트로 변환하기 위해 사용

# 주차장정보 리스트를 반복하면서 각 dict를 새로운 행으로 풀어냄
expanded_rows = []

for idx, row in non_na_grid_df.iterrows():
    try:
        # 문자열을 파이썬 객체로 변환 (리스트 of 딕셔너리)
        parking_list = ast.literal_eval(row["주차장정보"])
        
        for parking in parking_list:
            # center_lat, center_lon 등 기존 정보도 함께 포함
            combined = {
                "center_lat": row["center_lat"],
                "center_lon": row["center_lon"],
                "주차장수": row["주차장수"]
            }
            combined.update(parking)  # 주차장 dict 내용 추가
            expanded_rows.append(combined)
    except Exception as e:
        print(f"Error parsing row {idx}: {e}")

# 새 DataFrame 생성
expanded_df = pd.DataFrame(expanded_rows)

# 결과 확인
expanded_df.head(3)


Error parsing row 4847: malformed node or string on line 1: <ast.Name object at 0x12f233d30>
Error parsing row 6075: malformed node or string on line 1: <ast.Name object at 0x12f233e80>
Error parsing row 7739: malformed node or string on line 1: <ast.Name object at 0x130696a70>


Unnamed: 0,center_lat,center_lon,주차장수,경도,위도,법정동코드,주소,새주소-도로명,법정동명
0,37.535004,127.06021,2.0,127.06046,37.535041,10500,서울특별시 광진구 자양동 210-23,동일로2길,자양동
1,37.535004,127.06021,2.0,127.06045,37.53513,10500,서울특별시 광진구 자양동 210-22 대일카쎈타,동일로2길,자양동
2,37.533654,127.060784,1.0,127.060665,37.533727,10500,서울특별시 광진구 자양동 160-1,동일로,자양동


In [24]:
expanded_df.to_csv("grid_data_set.csv")

In [26]:
expanded_df["법정동명"].value_counts()

중곡동    5516
자양동    3533
구의동    3376
화양동    1139
군자동    1073
능동      822
광장동     336
Name: 법정동명, dtype: int64

In [28]:
구의동_df = expanded_df[expanded_df["법정동명"] == "구의동"]
구의동_df

Unnamed: 0,center_lat,center_lon,주차장수,경도,위도,법정동코드,주소,새주소-도로명,법정동명
6497,37.543192,127.081104,1.0,127.081245,37.543255,10300,서울특별시 광진구 구의동 649-3 그레이스,자양로23나길,구의동
6958,37.541842,127.081677,1.0,127.081912,37.541978,10300,서울특별시 광진구 구의동 652-6,자양로23가길,구의동
6959,37.542293,127.081675,10.0,127.081519,37.542187,10300,서울특별시 광진구 구의동 650-11 주함해븐힐,자양로23가길,구의동
6960,37.542293,127.081675,10.0,127.081529,37.542081,10300,서울특별시 광진구 자양동 650-12,자양로23가길,구의동
6961,37.542293,127.081675,10.0,127.081763,37.542267,10300,서울특별시 광진구 구의동 651-5,자양로23나길,구의동
...,...,...,...,...,...,...,...,...,...
15473,37.554065,127.097459,2.0,127.097702,37.554040,10300,서울특별시 광진구 구의동 18 영화유치원,영화사로,구의동
15474,37.554065,127.097459,2.0,127.097269,37.554064,10300,서울특별시 광진구 구의동 662 구의동 아차산 한라아파트,영화사로16길,구의동
15475,37.554516,127.097457,1.0,127.097270,37.554653,10300,서울특별시 광진구 구의동 17-3 진보아트,영화사로,구의동
15490,37.553618,127.098594,1.0,127.098798,37.553420,10300,서울특별시 광진구 구의동 4-5 서울동의초등학교,영화사로,구의동


In [29]:
import folium
import random
from folium.plugins import MarkerCluster

def visualization_girdmap(expanded_df, save_name):
    # folium은 html 기반 지도, 평균 좌표를 기준으로 지도 초기화
    center_lat = expanded_df["center_lat"].mean()
    center_lon = expanded_df["center_lon"].mean()
    m = folium.Map(location=[center_lat, center_lon], zoom_start=14)

    # 동 이름별 색상 지정
    dong_names = expanded_df["법정동명"].unique()
    color_map = {dong: f'#{random.randint(0, 0xFFFFFF):06x}' for dong in dong_names}

    # 마커 클러스터로 그룹화 (동적 줌/정리)
    marker_cluster = MarkerCluster().add_to(m)

    # 각 행마다 마커 추가
    for _, row in expanded_df.iterrows():
        lat, lon = row["위도"], row["경도"]
        dong = row["법정동명"]
        address = row["주소"]
        popup_text = f"{dong}<br>{address}"

        folium.CircleMarker(
            location=[lat, lon],
            radius=5,
            color=color_map[dong],
            fill=True,
            fill_opacity=0.7,
            popup=folium.Popup(popup_text, max_width=250)
        ).add_to(marker_cluster)

    # 저장 및 표시
    return m.save(save_name)


In [30]:
구의동_map = visualization_girdmap(구의동_df, "구의동_주차장.html")

In [48]:
유흥업소_path = "/Users/yujin/Desktop/파일/3-1/데이터분석 공모전/codes/키즈카페입지분석2/data/공공장소/유흥주점.json"
버스정류장_path = "/Users/yujin/Desktop/파일/3-1/데이터분석 공모전/codes/키즈카페입지분석2/data/공공장소/광진구_버스정류장_좌표평균처리.csv"
음식점_path = "/Users/yujin/Desktop/파일/3-1/데이터분석 공모전/codes/키즈카페입지분석2/data/공공장소/서울시광진구일반음식점인허가정보.xls"

In [49]:
유흥업소_df = pd.read_json(유흥업소_path)
유흥업소_df

ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.

In [None]:
음식점_df = pd.read_excel(음식점_path)

버스정류장_df = open_with_auto_sensing_encoding_types(버스정류장_path)

  warn("Workbook contains no default style, apply openpyxl's default")


파일의 인코딩: utf-8
26
파일의 인코딩: UTF-8-SIG
156


In [38]:
음식점_df

Unnamed: 0,개방자치단체코드,관리번호,인허가일자,인허가취소일자,영업상태코드,영업상태명,상세영업상태코드,상세영업상태명,폐업일자,휴업시작일자,...,공장판매직종업원수,공장생산직종업원수,건물소유구분명,보증액,월세액,다중이용업소여부,시설총규모,전통업소지정번호,전통업소주된음식,홈페이지
0,3040000,3040000-101-1930-00394,1930-04-17,,3,폐업,2,폐업,1995-08-02,,...,,,,,,N,21.00,,,
1,3040000,3040000-101-1974-00270,1974-09-21,,3,폐업,2,폐업,2000-12-30,,...,,,,,,N,35.96,,,
2,3040000,3040000-101-1975-00414,1975-05-27,,3,폐업,2,폐업,1995-08-09,,...,,,,,,N,128.72,,,
3,3040000,3040000-101-1976-00121,1976-10-08,,3,폐업,2,폐업,2004-06-16,,...,,,,,,N,63.06,,,
4,3040000,3040000-101-1976-00215,1976-11-26,,3,폐업,2,폐업,2012-06-29,,...,,,,,,N,50.60,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17580,3040000,3040000-101-2025-00087,2025-03-25,,1,영업/정상,1,영업,,,...,,,,,,,,,,
17581,3040000,3040000-101-2025-00088,2025-03-25,,1,영업/정상,1,영업,,,...,,,,,,,,,,
17582,3040000,3040000-101-2025-00089,2025-03-27,,1,영업/정상,1,영업,,,...,,,,,,,,,,
17583,3040000,3040000-101-2025-00090,2025-03-28,,1,영업/정상,1,영업,,,...,,,,,,,,,,


In [44]:
유흥업소_df.head(4)

Unnamed: 0,������ġ��ü�ڵ�,������ȣ,���㰡����,���㰡�������,���������ڵ�,�������¸�,�󼼿��������ڵ�,�󼼿������¸�,�������,�޾���������,...,�����Ǹ�����������,�����������������,�ǹ��������и�,������.1,������.2,�����̿���ҿ���,�ü��ѱԸ�,�������������ȣ,��������ֵ�����,Ȩ������
0,3040000,3040000-102-1972-06844,1972-08-04,,3,���,2,���,2024-09-11,,...,0.0,0.0,,0.0,0.0,Y,75.33,,,
1,3040000,3040000-102-1974-06848,1974-11-05,,3,���,2,���,2021-05-11,,...,,,,,,Y,114.1,,,
2,3040000,3040000-102-1974-06852,1974-10-19,,1,����/����,1,����,,,...,,,,,,Y,89.48,,,
3,3040000,3040000-102-1975-06843,1975-10-10,,3,���,2,���,2010-06-21,,...,,,,,,N,112.55,,,


In [33]:
버스정류장_df.head(3)

Unnamed: 0,역명,X좌표,Y좌표,00시승차총승객수,00시하차총승객수,1시승차총승객수,1시하차총승객수,2시승차총승객수,2시하차총승객수,3시승차총승객수,...,19시승차총승객수,19시하차총승객수,20시승차총승객수,20시하차총승객수,21시승차총승객수,21시하차총승객수,22시승차총승객수,22시하차총승객수,23시승차총승객수,23시하차총승객수
0,CU중곡긴고랑점앞,127.093017,37.558838,0,0,0,0,0,0,0,...,47,155,34,167,28,108,11,103,0,0
1,강변역.테크노마트앞,127.094824,37.536367,0,0,0,0,0,0,0,...,3570,1677,2615,1055,2444,732,2362,567,862,192
2,강변역A,127.093775,37.536051,0,0,0,0,0,0,0,...,3749,1354,2453,1113,2415,931,2258,687,1202,322
