In [3]:
import csv
import pandas as pd
import scipy.stats as stats


def read_data():
    culture_center = pd.read_csv(
        './df_culture_center.csv', encoding='euc-kr', dtype={"새주소": str, "지번주소": str, "X": float, "Y": float, "콘텐츠 명": str, "구명": str, "상세정보 값1": str})
    rest_restaurant = pd.read_csv(
        './df_rest_restaurant_h.csv', encoding='euc-kr')
    restaurant = pd.read_csv(
        './df_seoul_restaurant_h.csv', encoding='euc-kr')
    park = pd.read_csv('./df_seoul_park.csv', encoding='euc-kr')
    rate = pd.read_csv('./secure7.csv', dtype={'전체': int,'살인': int,'강도': int,'절도': int,
                                                    '폭력': int, '성폭력': int, '주소': str, '위도': float, '경도': float, '지구대': str, '관할구역': str})

    rate = rate.dropna()
    return culture_center, rest_restaurant, restaurant, park, rate


def make_dict(rate):
    # 각 지구대, 피출소 별 관할 구역
    boundary_dict = dict()
    # str(rate['관할구역'])
    for idx in range(len(rate)):
        boundary_dict[rate.loc[idx]['지구대']] = rate.loc[idx]['관할구역'].split(', ')
    # print(boundary_dict)

    # 각 지구대, 파출소 별 치안 등급 (전체, 살인, 강도, 절도, 폭력, 성폭력)
    rate_dict = dict()
    col_list = list(rate)[:6]  # 컬럼명
    for idx in range(len(rate)):
        rate_dict[rate.loc[idx]['지구대']] = dict()
        for c in col_list:
            rate_dict[rate.loc[idx]['지구대']][c] = rate.loc[idx][c]
#     print(rate_dict)

    # 각 지구대, 파출소 별 근린시설 갯수 초기화
    green_num = dict()
    for idx in range(len(rate)):
        green_num[rate.loc[idx]['지구대']] = 0

    return boundary_dict, rate_dict, green_num

# 각 지구대별 근린공원개수 세기

# 문화시설


def count_culture_center(boundary_dict, green_num, rate):
    keys = boundary_dict.keys()
    no_cnt = 0
    for key in keys:
        boundaries = boundary_dict[key]
        for idx in range(len(culture_center)):
            street = str(culture_center.loc[idx]['새주소'])
            addr = str(culture_center.loc[idx]['지번주소'])
            for bd in boundaries:
                if bd in addr:
                    green_num[key] += 1
                    continue
                elif bd in street:
                    green_num[key] += 1
                    continue
        # culture_center_num[key] /= len(boundaries)
        # print(key)
    # print(green_num)
    return green_num

# 휴게음식점


def count_rest_restaurant(boundary_dict, green_num, rate):
    keys = boundary_dict.keys()
    no_cnt = 0
    for key in keys:
        boundaries = boundary_dict[key]
        for idx in range(len(rest_restaurant)):
            dong = str(rest_restaurant.loc[idx]['행정동명'])
            addr = str(rest_restaurant.loc[idx]['소재지지번'])
            addr2 = str(rest_restaurant.loc[idx]['소재지도로명'])
            for bd in boundaries:
                if bd in dong:
                    green_num[key] += 1
                    continue
                elif bd in addr:
                    green_num[key] += 1
                    continue
                elif bd in addr2:
                    green_num[key] += 1
                    continue

        # culture_center_num[key] /= len(boundaries)
        # print(key)
    # print(green_num)
    return green_num

# 일반음식점


def count_restaurant(boundary_dict, green_num, rate):
    keys = boundary_dict.keys()
    no_cnt = 0
    for key in keys:
        boundaries = boundary_dict[key]
        for idx in range(len(restaurant)):
            dong = str(restaurant.loc[idx]['행정동명'])
            addr = str(restaurant.loc[idx]['소재지지번'])
            addr2 = str(restaurant.loc[idx]['소재지도로명'])
            for bd in boundaries:
                if bd in dong:
                    green_num[key] += 1
                    continue
                elif bd in addr:
                    green_num[key] += 1
                    continue
                elif bd in addr2:
                    green_num[key] += 1
                    continue
        # culture_center_num[key] /= len(boundaries)
        # print(key)
    # print(green_num)
    return green_num

# 공원


def count_park(boundary_dict, green_num, rate):
    keys = boundary_dict.keys()
    no_cnt = 0
    for key in keys:
        boundaries = boundary_dict[key]
        for idx in range(len(park)):
            dong = str(park.loc[idx]['행정동'])
            addr = str(park.loc[idx]['공원주소'])
            for bd in boundaries:
                if bd in dong:
                    green_num[key] += 1
                    continue
                elif bd in addr:
                    green_num[key] += 1
                    continue
        # culture_center_num[key] /= len(boundaries)
        # print(key)
#     print(green_num)
    return green_num



In [4]:
culture_center, rest_restaurant, restaurant, park, rate = read_data()
boundary_dict, rate_dict, green_num = make_dict(rate)

count_culture_center(boundary_dict, green_num, culture_center)
count_rest_restaurant(boundary_dict, green_num, rest_restaurant)
count_restaurant(boundary_dict, green_num, restaurant)
green_num = count_park(boundary_dict, green_num, park)

print(green_num)
# correlation(cctv_num, rate)
# corr_ad()


{'교남': 2324, '사직': 2257, '옥인': 336, '청운': 80, '통의': 779, '신문로': 3508, '삼청': 5128, '청진': 1092, '관수': 1432, '종로2가': 2378, '종로5가': 2167, '효제': 1644, '대학로': 2126, '덕산': 149, '명륜': 1054, '혜화': 1244, '창신': 1058, '동묘': 940, '세검정': 887, '평창': 3403, '상암': 2185, '월드컵': 6723, '연남': 3482, '공덕': 194, '용강': 2908, '서강': 6944, '홍익': 6315, '망원': 1108, '문래': 3234, '영등포역': 5586, '당산': 2837, '대림3': 401, '대림': 1461, '신풍': 1233, '양평': 2306, '신길': 1602, '여의도': 4838, '중앙': 5691, '세종로': 1014, '태평로': 2738, '신당': 277, '광희': 105162, '약수': 376, '서울역': 1003, '중림': 705, '서소문': 3985, '명동': 2940, '을지로3가': 1615, '을지': 3392, '충무': 1053, '회현': 1147, '남대문': 1571, '장충': 1020, '원효': 1600, '용중': 2462, '이태원': 1383, '한남': 0, '보광': 375, '용산역': 813, '한강로': 1266, '역삼': 11741, '논현1': 6083, '삼성1': 3216, '삼성2': 647, '청담': 1574, '신사': 0, '압구정': 5471, '대치': 3241, '도곡': 8350, '일원': 1094, '개포': 769, '수서': 422, '대왕': 420, '논현2': 6284, '문성': 776, '백산': 829, '금천': 1100, '독산': 865, '가산 ': 1973, '신구로': 3134, '오류': 402, '천왕': 213, '구일': 330, 

In [None]:
print(type(green_num))

In [5]:
def correlation(green_num, rate):
    green_df = pd.DataFrame(green_num.items(), columns=['지구대', '근린시설'])
    merge_rate_green = pd.merge(green_df, rate, on='지구대')
    corr_df = merge_rate_green[['근린시설','전체', '살인', '강도', '절도', '폭력', '성폭력']]
    # print('corr_df :', corr_df)
    print('sum: ', corr_df['근린시설'].sum())
    corr_df.to_csv("./corr_df.csv", header=True, index=False)

    corr = corr_df.corr(method='pearson')
    print('corr :', corr)
    corr.to_csv("./corr.csv", header=True, index=False)
    return corr

# 각 구별 상관분석


# def corr_ad():
#     ad = ['종로구', '중구', '마포구', '영등포구']
#     corr_df = pd.read_csv('./corr_df.csv')

#     for i in range(len(ad)):
#         is_ad = corr_df['구'] == ad[i]
#         corr = is_ad.corr(method='pearson')
#         print('corr :', corr)
#     return

# green_df = pd.DataFrame(green_num.items(), columns=['지구대', '근린시설'])
# print(green_df)

In [6]:
correlation(green_num, rate)

sum:  398238
corr :           근린시설        전체        살인        강도        절도        폭력       성폭력
근린시설  1.000000  0.072947  0.084635  0.034499 -0.083350 -0.076655  0.012008
전체    0.072947  1.000000  0.466231  0.594783  0.574088  0.477147  0.755298
살인    0.084635  0.466231  1.000000  0.524719  0.229141  0.314529  0.325826
강도    0.034499  0.594783  0.524719  1.000000  0.326970  0.209699  0.412056
절도   -0.083350  0.574088  0.229141  0.326970  1.000000  0.692077  0.408250
폭력   -0.076655  0.477147  0.314529  0.209699  0.692077  1.000000  0.366570
성폭력   0.012008  0.755298  0.325826  0.412056  0.408250  0.366570  1.000000


Unnamed: 0,근린시설,전체,살인,강도,절도,폭력,성폭력
근린시설,1.0,0.072947,0.084635,0.034499,-0.08335,-0.076655,0.012008
전체,0.072947,1.0,0.466231,0.594783,0.574088,0.477147,0.755298
살인,0.084635,0.466231,1.0,0.524719,0.229141,0.314529,0.325826
강도,0.034499,0.594783,0.524719,1.0,0.32697,0.209699,0.412056
절도,-0.08335,0.574088,0.229141,0.32697,1.0,0.692077,0.40825
폭력,-0.076655,0.477147,0.314529,0.209699,0.692077,1.0,0.36657
성폭력,0.012008,0.755298,0.325826,0.412056,0.40825,0.36657,1.0
