In [4]:
import csv
import pandas as pd
import scipy.stats as stats


def read_data():
    culture_center = pd.read_csv(
        './df_culture_center.csv', encoding='euc-kr', dtype={"새주소": str, "지번주소": str, "X": float, "Y": float, "콘텐츠 명": str, "구명": str, "상세정보 값1": str})
    rest_restaurant = pd.read_csv(
        './df_rest_restaurant_l.csv', encoding='euc-kr')
    restaurant = pd.read_csv(
        './df_seoul_restaurant_l.csv', encoding='euc-kr')
    park = pd.read_csv('./df_seoul_park.csv', encoding='euc-kr')
    rate = pd.read_csv('./low_secure.csv', dtype={'전체': int,'살인': int,'강도': int,'절도': int,
                                                    '폭력': int, '성폭력': int, '주소': str, '위도': float, '경도': float, '지구대': str, '관할구역': str})

    rate = rate.dropna()
    return culture_center, rest_restaurant, restaurant, park, rate


def make_dict(rate):
    # 각 지구대, 피출소 별 관할 구역
    boundary_dict = dict()
    # str(rate['관할구역'])
    for idx in range(len(rate)):
        boundary_dict[rate.loc[idx]['지구대']] = rate.loc[idx]['관할구역'].split(', ')
    # print(boundary_dict)

    # 각 지구대, 파출소 별 치안 등급 (전체, 살인, 강도, 절도, 폭력, 성폭력)
    rate_dict = dict()
    col_list = list(rate)[:6]  # 컬럼명
    for idx in range(len(rate)):
        rate_dict[rate.loc[idx]['지구대']] = dict()
        for c in col_list:
            rate_dict[rate.loc[idx]['지구대']][c] = rate.loc[idx][c]
#     print(rate_dict)

    # 각 지구대, 파출소 별 근린시설 갯수 초기화
    green_num = dict()
    for idx in range(len(rate)):
        green_num[rate.loc[idx]['지구대']] = 0

    return boundary_dict, rate_dict, green_num

# 각 지구대별 근린공원개수 세기

# 문화시설


def count_culture_center(boundary_dict, green_num, rate):
    keys = boundary_dict.keys()
    no_cnt = 0
    for key in keys:
        boundaries = boundary_dict[key]
        for idx in range(len(culture_center)):
            street = str(culture_center.loc[idx]['새주소'])
            addr = str(culture_center.loc[idx]['지번주소'])
            for bd in boundaries:
                if bd in addr:
                    green_num[key] += 1
                    continue
                elif bd in street:
                    green_num[key] += 1
                    continue
        # culture_center_num[key] /= len(boundaries)
        # print(key)
    # print(green_num)
    return green_num

# 휴게음식점


def count_rest_restaurant(boundary_dict, green_num, rate):
    keys = boundary_dict.keys()
    no_cnt = 0
    for key in keys:
        boundaries = boundary_dict[key]
        for idx in range(len(rest_restaurant)):
            dong = str(rest_restaurant.loc[idx]['행정동명'])
            addr = str(rest_restaurant.loc[idx]['소재지지번'])
            addr2 = str(rest_restaurant.loc[idx]['소재지도로명'])
            for bd in boundaries:
                if bd in dong:
                    green_num[key] += 1
                    continue
                elif bd in addr:
                    green_num[key] += 1
                    continue
                elif bd in addr2:
                    green_num[key] += 1
                    continue

        # culture_center_num[key] /= len(boundaries)
        # print(key)
    # print(green_num)
    return green_num

# 일반음식점


def count_restaurant(boundary_dict, green_num, rate):
    keys = boundary_dict.keys()
    no_cnt = 0
    for key in keys:
        boundaries = boundary_dict[key]
        for idx in range(len(restaurant)):
            dong = str(restaurant.loc[idx]['행정동명'])
            addr = str(restaurant.loc[idx]['소재지지번'])
            addr2 = str(restaurant.loc[idx]['소재지도로명'])
            for bd in boundaries:
                if bd in dong:
                    green_num[key] += 1
                    continue
                elif bd in addr:
                    green_num[key] += 1
                    continue
                elif bd in addr2:
                    green_num[key] += 1
                    continue
        # culture_center_num[key] /= len(boundaries)
        # print(key)
    # print(green_num)
    return green_num

# 공원


def count_park(boundary_dict, green_num, rate):
    keys = boundary_dict.keys()
    no_cnt = 0
    for key in keys:
        boundaries = boundary_dict[key]
        for idx in range(len(park)):
            dong = str(park.loc[idx]['행정동'])
            addr = str(park.loc[idx]['공원주소'])
            for bd in boundaries:
                if bd in dong:
                    green_num[key] += 1
                    continue
                elif bd in addr:
                    green_num[key] += 1
                    continue
        # culture_center_num[key] /= len(boundaries)
        # print(key)
#     print(green_num)
    return green_num



In [5]:
culture_center, rest_restaurant, restaurant, park, rate = read_data()
boundary_dict, rate_dict, green_num = make_dict(rate)

count_culture_center(boundary_dict, green_num, culture_center)
count_rest_restaurant(boundary_dict, green_num, rest_restaurant)
count_restaurant(boundary_dict, green_num, restaurant)
green_num = count_park(boundary_dict, green_num, park)

print(green_num)
# correlation(cctv_num, rate)
# corr_ad()


{'화곡': 4855, '곰달래': 5158, '공항': 3811, '까치산': 4822, '가양': 4582, '발산': 1822, '염창': 2592, '화곡3': 4160, '등촌2': 1734, '방환3': 1771, '목1': 5990, '목2': 5119, '신정1': 3546, '신정2': 1166, '신월2': 2659, '신월1': 2436, '신월5': 280, '신정3': 2646, '당현': 2374, '월계': 1959, '화랑': 2919, '노원역': 4474, '불암': 2614, '상계4': 3241, '마들': 3894, '상계1': 3814, '석관': 544, '장위': 690, '월곡': 2285, '종암': 486, '돈암': 1573, '안암': 2826, '길음': 3235, '정릉': 1297, '정릉2': 1101, '성북': 421, '창동': 3175, '신창': 2627, '방학': 1976, '쌍문': 1391, '신방학': 1615, '도봉1파출소': 1297, '도봉2': 1118, '승미': 1456, '녹번': 482, '신사': 942, '응암': 1343, '응암2': 185, '불광': 1687, '연신내': 2837, '진관': 675, '역촌파출소': 686, '대조파출소': 909}


In [None]:
print(type(green_num))

In [6]:
def correlation(green_num, rate):
    green_df = pd.DataFrame(green_num.items(), columns=['지구대', '근린시설'])
    merge_rate_green = pd.merge(green_df, rate, on='지구대')
    corr_df = merge_rate_green[['근린시설','전체', '살인', '강도', '절도', '폭력', '성폭력']]
    # print('corr_df :', corr_df)
    print('sum: ', corr_df['근린시설'].sum())
    corr_df.to_csv("./corr_df.csv", header=True, index=False)

    corr = corr_df.corr(method='pearson')
    print('corr :', corr)
    corr.to_csv("./corr.csv", header=True, index=False)
    return corr

# 각 구별 상관분석


# def corr_ad():
#     ad = ['종로구', '중구', '마포구', '영등포구']
#     corr_df = pd.read_csv('./corr_df.csv')

#     for i in range(len(ad)):
#         is_ad = corr_df['구'] == ad[i]
#         corr = is_ad.corr(method='pearson')
#         print('corr :', corr)
#     return

# green_df = pd.DataFrame(green_num.items(), columns=['지구대', '근린시설'])
# print(green_df)

In [7]:
correlation(green_num, rate)

sum:  123297
corr :           근린시설        전체        살인        강도        절도        폭력       성폭력
근린시설  1.000000  0.377855  0.327874  0.207676  0.482331  0.592672  0.441800
전체    0.377855  1.000000  0.328855  0.415574  0.352987  0.425978  0.911864
살인    0.327874  0.328855  1.000000  0.062242  0.283845  0.364570  0.249467
강도    0.207676  0.415574  0.062242  1.000000  0.159182  0.083949  0.362564
절도    0.482331  0.352987  0.283845  0.159182  1.000000  0.559665  0.344237
폭력    0.592672  0.425978  0.364570  0.083949  0.559665  1.000000  0.413290
성폭력   0.441800  0.911864  0.249467  0.362564  0.344237  0.413290  1.000000


Unnamed: 0,근린시설,전체,살인,강도,절도,폭력,성폭력
근린시설,1.0,0.377855,0.327874,0.207676,0.482331,0.592672,0.4418
전체,0.377855,1.0,0.328855,0.415574,0.352987,0.425978,0.911864
살인,0.327874,0.328855,1.0,0.062242,0.283845,0.36457,0.249467
강도,0.207676,0.415574,0.062242,1.0,0.159182,0.083949,0.362564
절도,0.482331,0.352987,0.283845,0.159182,1.0,0.559665,0.344237
폭력,0.592672,0.425978,0.36457,0.083949,0.559665,1.0,0.41329
성폭력,0.4418,0.911864,0.249467,0.362564,0.344237,0.41329,1.0
