# 범죄 데이터 정규화

> 범죄별 발생 건수를 0과 1 사이 값으로 정규화한다.

In [3]:
import pandas as pd

crime_anal_gu = pd.read_csv("./result_data/04_crime_anal_gu.csv", index_col=0)
crime_anal_gu.head()

Unnamed: 0_level_0,강간,강도,살인,절도,폭력,강간검거율,강도검거율,살인검거율,절도검거율,폭력검거율
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
강남구,516.0,39.0,5.0,3587.0,4002.0,80.03876,100.0,100.0,53.470867,88.130935
강동구,160.0,14.0,4.0,1754.0,2530.0,95.0,92.857143,100.0,51.425314,86.996047
강북구,217.0,5.0,7.0,1222.0,2778.0,73.271889,80.0,85.714286,54.991817,89.344852
강서구,275.0,10.0,9.0,1952.0,3204.0,86.909091,100.0,100.0,54.815574,86.39201
관악구,322.0,12.0,6.0,2103.0,3235.0,81.987578,83.333333,100.0,44.555397,83.678516


#### 🔰 구별 데이터에서 발생건수 정규화 데이터 생성

- 범죄의 경중에 따라 발생 건수의 차이가 크다.
- 살인은 한 자리 수 발생, 절도나 폭력은 네 자리 수 발생

- 발생 건수를 0과 1 사이로 정규화하여 최대값이 1이 되면 상호 비교가 용이해진다.

- 정규화 : 최고값은 1, 최소값은 0

In [16]:
crime_anal_gu["강도"] / crime_anal_gu["강도"].max()

구별
강남구     1.000000
강동구     0.358974
강북구     0.128205
강서구     0.256410
관악구     0.307692
광진구     0.282051
구로구     0.256410
금천구     0.179487
노원구     0.153846
도봉구     0.128205
동대문구    0.256410
동작구     0.179487
마포구     0.102564
서대문구    0.128205
서초구     0.333333
성동구     0.076923
성북구     0.205128
송파구     0.384615
양천구     0.179487
영등포구    0.487179
용산구     0.230769
은평구     0.230769
종로구     0.307692
중구      0.205128
중랑구     0.358974
Name: 강도, dtype: float64

In [5]:
col1 = ["살인", "강도", "강간", "절도", "폭력"]

crime_anal_norm = crime_anal_gu[col1] / crime_anal_gu[col1].max()
crime_anal_norm.head()

Unnamed: 0_level_0,살인,강도,강간,절도,폭력
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
강남구,0.384615,1.0,1.0,1.0,1.0
강동구,0.307692,0.358974,0.310078,0.488988,0.632184
강북구,0.538462,0.128205,0.420543,0.340675,0.694153
강서구,0.692308,0.25641,0.532946,0.544187,0.8006
관악구,0.461538,0.307692,0.624031,0.586284,0.808346


- 새로 만든 DataFrame에 검거율 컬럼도 추가

In [6]:
col2 = ["강간검거율", "강도검거율", "살인검거율", "절도검거율", "폭력검거율"]

crime_anal_norm[col2] = crime_anal_gu[col2]
crime_anal_norm.head()

Unnamed: 0_level_0,살인,강도,강간,절도,폭력,강간검거율,강도검거율,살인검거율,절도검거율,폭력검거율
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
강남구,0.384615,1.0,1.0,1.0,1.0,80.03876,100.0,100.0,53.470867,88.130935
강동구,0.307692,0.358974,0.310078,0.488988,0.632184,95.0,92.857143,100.0,51.425314,86.996047
강북구,0.538462,0.128205,0.420543,0.340675,0.694153,73.271889,80.0,85.714286,54.991817,89.344852
강서구,0.692308,0.25641,0.532946,0.544187,0.8006,86.909091,100.0,100.0,54.815574,86.39201
관악구,0.461538,0.307692,0.624031,0.586284,0.808346,81.987578,83.333333,100.0,44.555397,83.678516


#### 🔰 서울시 CCTV 데이터 추가

> 서울시 CCTV 자료에서 구별 인구수와 CCTV수를 가져와 추가한다.

In [10]:
pd.read_csv("../01_Seoul_CCTV/result_data/04_merge_data_result.csv").head(1)

Unnamed: 0,구별,소계,최근증가율,인구수,한국인,외국인,고령자,외국인비율,고령자비율,CCTV비율,오차
0,강남구,3238,150.619195,561052,556164,4888,65060,0.87122,11.596073,0.57713,1549.200326


In [11]:
result_CCTV = pd.read_csv(
    "../01_Seoul_CCTV/result_data/04_merge_data_result.csv",
    index_col="구별", encoding="utf-8"
)
result_CCTV.head()


Unnamed: 0_level_0,소계,최근증가율,인구수,한국인,외국인,고령자,외국인비율,고령자비율,CCTV비율,오차
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
강남구,3238,150.619195,561052,556164,4888,65060,0.87122,11.596073,0.57713,1549.200326
강동구,1010,166.490765,440359,436223,4136,56161,0.939234,12.753458,0.229358,-544.642322
강북구,831,125.203252,328002,324479,3523,56530,1.074079,17.234651,0.253352,-598.750923
강서구,911,134.793814,608255,601691,6564,76032,1.079153,12.500021,0.149773,-830.268578
관악구,2109,149.29078,520929,503297,17632,70046,3.384722,13.446362,0.404854,464.799395


In [12]:
crime_anal_norm[["인구수", "CCTV"]] = result_CCTV[["인구수", "소계"]]
crime_anal_norm.head()

Unnamed: 0_level_0,살인,강도,강간,절도,폭력,강간검거율,강도검거율,살인검거율,절도검거율,폭력검거율,인구수,CCTV
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
강남구,0.384615,1.0,1.0,1.0,1.0,80.03876,100.0,100.0,53.470867,88.130935,561052,3238
강동구,0.307692,0.358974,0.310078,0.488988,0.632184,95.0,92.857143,100.0,51.425314,86.996047,440359,1010
강북구,0.538462,0.128205,0.420543,0.340675,0.694153,73.271889,80.0,85.714286,54.991817,89.344852,328002,831
강서구,0.692308,0.25641,0.532946,0.544187,0.8006,86.909091,100.0,100.0,54.815574,86.39201,608255,911
관악구,0.461538,0.307692,0.624031,0.586284,0.808346,81.987578,83.333333,100.0,44.555397,83.678516,520929,2109


#### 🔰 지표 데이터 추가

- np.mean()

	- axis=0 열(culumn), axis=1 행(row)을 기준으로 평균을 계산

In [27]:
import numpy as np

arr1D = np.array([0.357143, 1.000000, 1.000000, 0.977118, 0.733773])
arr1D

array([0.357143, 1.      , 1.      , 0.977118, 0.733773])

In [25]:
np.mean(arr1D)

0.8136068

In [28]:
arr2D = np.array([
    [0.357143, 1.000000, 1.000000, 0.977118, 0.733773],
    [0.285714, 0.358974, 0.310078, 0.477799, 0.463880]
])
arr2D

array([[0.357143, 1.      , 1.      , 0.977118, 0.733773],
       [0.285714, 0.358974, 0.310078, 0.477799, 0.46388 ]])

In [29]:
np.mean(arr2D, axis=0), np.mean(arr2D, axis=1)

(array([0.3214285, 0.679487 , 0.655039 , 0.7274585, 0.5988265]),
 array([0.8136068, 0.379289 ]))

> 정규화된 범죄발생 건수 전체의 평균을 구해서 '범죄'의 대표값으로 사용한다.

- "범죄" 컬럼을 만들고 5대 범죄 발생건수의 평균값을 할당한다.

In [13]:
col = ["살인", "강도", "강간", "절도", "폭력"]

crime_anal_norm["범죄"] = np.mean(crime_anal_norm[col], axis=1) # axis=1 행을 따라서 연산하는 옵션 
crime_anal_norm.head()

Unnamed: 0_level_0,살인,강도,강간,절도,폭력,강간검거율,강도검거율,살인검거율,절도검거율,폭력검거율,인구수,CCTV,범죄
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
강남구,0.384615,1.0,1.0,1.0,1.0,80.03876,100.0,100.0,53.470867,88.130935,561052,3238,0.876923
강동구,0.307692,0.358974,0.310078,0.488988,0.632184,95.0,92.857143,100.0,51.425314,86.996047,440359,1010,0.419583
강북구,0.538462,0.128205,0.420543,0.340675,0.694153,73.271889,80.0,85.714286,54.991817,89.344852,328002,831,0.424407
강서구,0.692308,0.25641,0.532946,0.544187,0.8006,86.909091,100.0,100.0,54.815574,86.39201,608255,911,0.56529
관악구,0.461538,0.307692,0.624031,0.586284,0.808346,81.987578,83.333333,100.0,44.555397,83.678516,520929,2109,0.557578


> 5대 범죄 검거율의 평균을 구해 '검거'의 대표값으로 사용한다.

- "검거" 컬럼을 만들고 5대 범죄 검거율의 평균값을 할당한다.

In [34]:
col = ["강간검거율", "강도검거율", "살인검거율", "절도검거율", "폭력검거율"]

crime_anal_norm["검거"] = np.mean(crime_anal_norm[col], axis=1)
crime_anal_norm

Unnamed: 0_level_0,살인,강도,강간,절도,폭력,강간검거율,강도검거율,살인검거율,절도검거율,폭력검거율,인구수,CCTV,범죄,검거
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
강남구,0.384615,1.0,1.0,1.0,1.0,80.03876,100.0,100.0,53.470867,88.130935,561052,3238,0.876923,84.328112
강동구,0.307692,0.358974,0.310078,0.488988,0.632184,95.0,92.857143,100.0,51.425314,86.996047,440359,1010,0.419583,85.255701
강북구,0.538462,0.128205,0.420543,0.340675,0.694153,73.271889,80.0,85.714286,54.991817,89.344852,328002,831,0.424407,76.664569
강서구,0.692308,0.25641,0.532946,0.544187,0.8006,86.909091,100.0,100.0,54.815574,86.39201,608255,911,0.56529,85.623335
관악구,0.461538,0.307692,0.624031,0.586284,0.808346,81.987578,83.333333,100.0,44.555397,83.678516,520929,2109,0.557578,78.710965
광진구,0.307692,0.282051,0.540698,0.734876,0.597701,83.870968,54.545455,100.0,40.098634,84.071906,372298,878,0.492604,72.517393
구로구,0.692308,0.25641,0.52907,0.532478,0.790605,66.300366,100.0,100.0,45.078534,84.702908,441559,1884,0.560174,79.216362
금천구,0.461538,0.179487,0.339147,0.352384,0.547976,81.714286,100.0,100.0,51.740506,88.73689,253491,1348,0.376107,84.438336
노원구,0.384615,0.153846,0.30814,0.517703,0.628686,89.308176,100.0,100.0,39.849219,84.419714,558075,1566,0.398598,82.715422
도봉구,0.230769,0.128205,0.238372,0.241427,0.36007,98.373984,100.0,100.0,56.812933,90.839695,346234,825,0.239769,89.205322


In [32]:
crime_anal_norm.to_csv("./result_data/05_crime_anal_norm.csv", encoding='utf-8')