In [1]:
import csv
import pandas as pd
import scipy.stats as stats


def read_data():
    culture_center = pd.read_csv(
        './df_culture_center.csv', encoding='euc-kr', dtype={"새주소": str, "지번주소": str, "X": float, "Y": float, "콘텐츠 명": str, "구명": str, "상세정보 값1": str})
    rest_restaurant = pd.read_csv(
        './df_rest_restaurant.csv', encoding='euc-kr')
    restaurant = pd.read_csv(
        './df_seoul_restaurant.csv', encoding='euc-kr')
    park = pd.read_csv('./df_seoul_park.csv', encoding='euc-kr')
    rate = pd.read_csv('./secure7.csv', dtype={'전체': int,'살인': int,'강도': int,'절도': int,
                                                    '폭력': int, '성폭력': int, '주소': str, '위도': float, '경도': float, '지구대': str, '관할구역': str})

    rate = rate.dropna()
    return culture_center, rest_restaurant, restaurant, park, rate


def make_dict(rate):
    # 각 지구대, 피출소 별 관할 구역
    boundary_dict = dict()
    # str(rate['관할구역'])
    for idx in range(len(rate)):
        boundary_dict[rate.loc[idx]['지구대']] = rate.loc[idx]['관할구역'].split(', ')
    # print(boundary_dict)

    # 각 지구대, 파출소 별 치안 등급 (전체, 살인, 강도, 절도, 폭력, 성폭력)
    rate_dict = dict()
    col_list = list(rate)[:6]  # 컬럼명
    for idx in range(len(rate)):
        rate_dict[rate.loc[idx]['지구대']] = dict()
        for c in col_list:
            rate_dict[rate.loc[idx]['지구대']][c] = rate.loc[idx][c]
#     print(rate_dict)

    # 각 지구대, 파출소 별 근린시설 갯수 초기화
    green_num = dict()
    for idx in range(len(rate)):
        green_num[rate.loc[idx]['지구대']] = 0

    return boundary_dict, rate_dict, green_num

# 각 지구대별 근린공원개수 세기

# 문화시설


def count_culture_center(boundary_dict, green_num, rate):
    keys = boundary_dict.keys()
    no_cnt = 0
    for key in keys:
        boundaries = boundary_dict[key]
        for idx in range(len(culture_center)):
            street = str(culture_center.loc[idx]['새주소'])
            addr = str(culture_center.loc[idx]['지번주소'])
            for bd in boundaries:
                if bd in addr:
                    green_num[key] += 1
                    continue
                elif bd in street:
                    green_num[key] += 1
                    continue
        # culture_center_num[key] /= len(boundaries)
        # print(key)
    # print(green_num)
    return green_num

# 휴게음식점


def count_rest_restaurant(boundary_dict, green_num, rate):
    keys = boundary_dict.keys()
    no_cnt = 0
    for key in keys:
        boundaries = boundary_dict[key]
        for idx in range(len(rest_restaurant)):
            dong = str(rest_restaurant.loc[idx]['행정동명'])
            addr = str(rest_restaurant.loc[idx]['소재지지번'])
            addr2 = str(rest_restaurant.loc[idx]['소재지도로명'])
            for bd in boundaries:
                if bd in dong:
                    green_num[key] += 1
                    continue
                elif bd in addr:
                    green_num[key] += 1
                    continue
                elif bd in addr2:
                    green_num[key] += 1
                    continue

        # culture_center_num[key] /= len(boundaries)
        # print(key)
    # print(green_num)
    return green_num

# 일반음식점


def count_restaurant(boundary_dict, green_num, rate):
    keys = boundary_dict.keys()
    no_cnt = 0
    for key in keys:
        boundaries = boundary_dict[key]
        for idx in range(len(restaurant)):
            dong = str(restaurant.loc[idx]['행정동명'])
            addr = str(restaurant.loc[idx]['소재지지번'])
            addr2 = str(restaurant.loc[idx]['소재지도로명'])
            for bd in boundaries:
                if bd in dong:
                    green_num[key] += 1
                    continue
                elif bd in addr:
                    green_num[key] += 1
                    continue
                elif bd in addr2:
                    green_num[key] += 1
                    continue
        # culture_center_num[key] /= len(boundaries)
        # print(key)
    # print(green_num)
    return green_num

# 공원


def count_park(boundary_dict, green_num, rate):
    keys = boundary_dict.keys()
    no_cnt = 0
    for key in keys:
        boundaries = boundary_dict[key]
        for idx in range(len(park)):
            dong = str(park.loc[idx]['행정동'])
            addr = str(park.loc[idx]['공원주소'])
            for bd in boundaries:
                if bd in dong:
                    green_num[key] += 1
                    continue
                elif bd in addr:
                    green_num[key] += 1
                    continue
        # culture_center_num[key] /= len(boundaries)
        # print(key)
#     print(green_num)
    return green_num



In [2]:
culture_center, rest_restaurant, restaurant, park, rate = read_data()
boundary_dict, rate_dict, green_num = make_dict(rate)

count_culture_center(boundary_dict, green_num, culture_center)
count_rest_restaurant(boundary_dict, green_num, rest_restaurant)
count_restaurant(boundary_dict, green_num, restaurant)
green_num = count_park(boundary_dict, green_num, park)

print(green_num)
# correlation(cctv_num, rate)
# corr_ad()


{'교남': 3058, '사직': 2992, '옥인': 383, '청운': 71, '통의': 878, '신문로': 4595, '삼청': 12766, '청진': 1267, '관수': 1560, '종로2가': 3023, '종로5가': 2715, '효제': 1902, '대학로': 2277, '덕산': 196, '명륜': 1249, '혜화': 1444, '창신': 1292, '동묘': 1215, '세검정': 821, '평창': 4636, '상암': 2121, '월드컵': 7630, '연남': 4438, '공덕': 165, '용강': 3225, '서강': 10337, '홍익': 7673, '망원': 1260, '문래': 3494, '영등포역': 6060, '당산': 3797, '대림3': 585, '대림': 2332, '신풍': 1772, '양평': 3299, '신길': 2216, '여의도': 6159, '중앙': 7129, '세종로': 1340, '태평로': 2805, '신당': 337, '광희': 155421, '약수': 413, '서울역': 753, '중림': 508, '서소문': 5379, '명동': 2136, '을지로3가': 2068, '을지': 3693, '충무': 1242, '회현': 1346, '남대문': 1122, '장충': 1196, '원효': 1693, '용중': 2541, '이태원': 2082, '한남': 0, '보광': 462, '용산역': 915, '한강로': 1186, '역삼': 12003, '논현1': 6871, '삼성1': 2072, '삼성2': 492, '청담': 1851, '신사': 0, '압구정': 5270, '대치': 2764, '도곡': 8645, '일원': 953, '개포': 606, '수서': 381, '대왕': 393, '논현2': 7841, '문성': 962, '백산': 852, '금천': 1190, '독산': 946, '가산 ': 1976, '신구로': 53, '오류': 1, '천왕': 677, '구일': 0, '구로3'

In [None]:
print(type(green_num))

In [3]:
def correlation(green_num, rate):
    green_df = pd.DataFrame(green_num.items(), columns=['지구대', '근린시설'])
    merge_rate_green = pd.merge(green_df, rate, on='지구대')
    corr_df = merge_rate_green[['근린시설','전체', '살인', '강도', '절도', '폭력', '성폭력']]
    # print('corr_df :', corr_df)
    print('sum: ', corr_df['근린시설'].sum())
    corr_df.to_csv("./corr_df.csv", header=True, index=False)

    corr = corr_df.corr(method='pearson')
    print('corr :', corr)
    corr.to_csv("./corr.csv", header=True, index=False)
    return corr

# 각 구별 상관분석


# def corr_ad():
#     ad = ['종로구', '중구', '마포구', '영등포구']
#     corr_df = pd.read_csv('./corr_df.csv')

#     for i in range(len(ad)):
#         is_ad = corr_df['구'] == ad[i]
#         corr = is_ad.corr(method='pearson')
#         print('corr :', corr)
#     return

# green_df = pd.DataFrame(green_num.items(), columns=['지구대', '근린시설'])
# print(green_df)

In [4]:
correlation(green_num, rate)

sum:  525494
corr :           근린시설        전체        살인        강도        절도        폭력       성폭력
근린시설  1.000000  0.062606  0.071215  0.019157 -0.087116 -0.077524 -0.005960
전체    0.062606  1.000000  0.466231  0.594783  0.574088  0.477147  0.755298
살인    0.071215  0.466231  1.000000  0.524719  0.229141  0.314529  0.325826
강도    0.019157  0.594783  0.524719  1.000000  0.326970  0.209699  0.412056
절도   -0.087116  0.574088  0.229141  0.326970  1.000000  0.692077  0.408250
폭력   -0.077524  0.477147  0.314529  0.209699  0.692077  1.000000  0.366570
성폭력  -0.005960  0.755298  0.325826  0.412056  0.408250  0.366570  1.000000


Unnamed: 0,근린시설,전체,살인,강도,절도,폭력,성폭력
근린시설,1.0,0.062606,0.071215,0.019157,-0.087116,-0.077524,-0.00596
전체,0.062606,1.0,0.466231,0.594783,0.574088,0.477147,0.755298
살인,0.071215,0.466231,1.0,0.524719,0.229141,0.314529,0.325826
강도,0.019157,0.594783,0.524719,1.0,0.32697,0.209699,0.412056
절도,-0.087116,0.574088,0.229141,0.32697,1.0,0.692077,0.40825
폭력,-0.077524,0.477147,0.314529,0.209699,0.692077,1.0,0.36657
성폭력,-0.00596,0.755298,0.325826,0.412056,0.40825,0.36657,1.0
