## CCTV 개수 등 정량적 기준과 서울시 예산 비교하기

2024년 8월 10일 데이터 분석 자습/실습 

* CCTV가 많은 구와 적은 구 출력
* 인구 내 외국인, 고령자 비율 대비 CCTV 비율이 높은 구와 낮은 구 간의 예산 비교
* 구별로 범죄율의 높낮음이 예산에 미치는 영향이 있는지 점검
* 나아가: CCTV가 많이 설치된 구는 예산도 많이 배정되는지를 확인, 이유를 분석한다 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import platform

from matplotlib import font_manager, rc
plt.rcParams['axes.unicode_minus'] = False

if platform.system() == 'Darwin':
    rc('font', family='AppleGothic')
elif platform.system() == 'Windows':
    path = "c:/Windows/Fonts/malgun.ttf"
    font_name = font_manager.FontProperties(fname=path).get_name()
    rc('font', family=font_name)
else:
    print('Unable to acknowldege system. Maybe Linux?')

In [2]:
SeoulCCTV = pd.read_csv('../data/01. CCTV_in_Seoul.csv', encoding='utf-8')
SeoulCCTV.rename(columns={SeoulCCTV.columns[0] : '구별'}, inplace=True)

In [3]:
SeoulPops = pd.read_excel('../data/01. population_in_Seoul.xls', 
                          header = 2,
                          usecols = 'B, D, G, J, N')

SeoulPops.rename(columns={SeoulPops.columns[0] : '구별', 
                          SeoulPops.columns[1] : '인구수', 
                          SeoulPops.columns[2] : '한국인', 
                          SeoulPops.columns[3] : '외국인', 
                          SeoulPops.columns[4] : '고령자'}, inplace=True)

SeoulPops.drop([0], inplace=True)
SeoulPops = SeoulPops.drop(SeoulPops.index[-1])

In [4]:
SeoulPops['외국인비율'] = SeoulPops['외국인'] / SeoulPops['인구수'] * 100
SeoulPops['고령자비율'] = SeoulPops['고령자'] / SeoulPops['인구수'] * 100
SeoulPops.head()

Unnamed: 0,구별,인구수,한국인,외국인,고령자,외국인비율,고령자비율
1,종로구,162820.0,153589.0,9231.0,25425.0,5.669451,15.615404
2,중구,133240.0,124312.0,8928.0,20764.0,6.70069,15.583909
3,용산구,244203.0,229456.0,14747.0,36231.0,6.038828,14.836427
4,성동구,311244.0,303380.0,7864.0,39997.0,2.526635,12.850689
5,광진구,372164.0,357211.0,14953.0,42214.0,4.017852,11.342849


In [5]:
SeoulBudget = pd.read_csv('../data/00. Seoul_Annual_Budget.csv', header = 2, usecols = ['B', 'C', 'F', 'I'], encoding='utf-8')
SeoulBudget.columns = ['구별', '총예산', '세출', '세입']
SeoulBudget.head()

Unnamed: 0,구별,총예산,세출,세입
0,자치구별(2),예산현액,세입,세출
1,자치구별(2),계,계,계
2,소계,80797281,84378232,73233977
3,본청,53468799,55571683,50276587
4,자치구,27328481,28806549,22957390


In [6]:
if(len(SeoulCCTV['구별'].unique()) == len(SeoulPops['구별'].unique())):
    print('구별 컬럼이 일치합니다.')
else:
    print('구별 컬럼이 일치하지 않습니다.')
    
SeoulPrep = pd.merge(SeoulCCTV, SeoulPops, on='구별')
SeoulPrep.head()

구별 컬럼이 일치합니다.


Unnamed: 0,구별,소계,2013년도 이전,2014년,2015년,2016년,인구수,한국인,외국인,고령자,외국인비율,고령자비율
0,강남구,2780,1292,430,584,932,570500.0,565550.0,4950.0,63167.0,0.86766,11.072217
1,강동구,773,379,99,155,377,453233.0,449019.0,4214.0,54622.0,0.929765,12.051638
2,강북구,748,369,120,138,204,330192.0,326686.0,3506.0,54813.0,1.061806,16.600342
3,강서구,884,388,258,184,81,603772.0,597248.0,6524.0,72548.0,1.08054,12.015794
4,관악구,1496,846,260,390,613,525515.0,507203.0,18312.0,68082.0,3.484582,12.955291


In [7]:
del SeoulPrep['2013년도 이전']
del SeoulPrep['2014년']
del SeoulPrep['2015년']
del SeoulPrep['2016년']

In [8]:
SeoulPrep.rename(columns={SeoulPrep.columns[1] : 'CCTV'}, inplace=True)
SeoulPrep.head()

Unnamed: 0,구별,CCTV,인구수,한국인,외국인,고령자,외국인비율,고령자비율
0,강남구,2780,570500.0,565550.0,4950.0,63167.0,0.86766,11.072217
1,강동구,773,453233.0,449019.0,4214.0,54622.0,0.929765,12.051638
2,강북구,748,330192.0,326686.0,3506.0,54813.0,1.061806,16.600342
3,강서구,884,603772.0,597248.0,6524.0,72548.0,1.08054,12.015794
4,관악구,1496,525515.0,507203.0,18312.0,68082.0,3.484582,12.955291


In [9]:
SeoulBudget.sort_values(by='구별', ascending=True, inplace=True)
SeoulBudget.head()

Unnamed: 0,구별,총예산,세출,세입
27,강남구,1499273,1652978,1250825
29,강동구,1237844,1289018,1049127
13,강북구,1059703,1127810,910812
20,강서구,1439528,1476278,1277854
25,관악구,1254073,1307239,1097285


In [10]:
SeoulData = pd.merge(SeoulPrep, SeoulBudget, on='구별')
del SeoulData['세입']
del SeoulData['세출']
SeoulData.head()

Unnamed: 0,구별,CCTV,인구수,한국인,외국인,고령자,외국인비율,고령자비율,총예산
0,강남구,2780,570500.0,565550.0,4950.0,63167.0,0.86766,11.072217,1499273
1,강동구,773,453233.0,449019.0,4214.0,54622.0,0.929765,12.051638,1237844
2,강북구,748,330192.0,326686.0,3506.0,54813.0,1.061806,16.600342,1059703
3,강서구,884,603772.0,597248.0,6524.0,72548.0,1.08054,12.015794,1439528
4,관악구,1496,525515.0,507203.0,18312.0,68082.0,3.484582,12.955291,1254073


In [13]:
# read csv file '02. crime_norm_shorter.csv' and name it SeoulCrime

SeoulCrime = pd.read_csv('../data/02. crime_norm_shorter.csv', encoding='utf-8')
SeoulCrime.head()

Unnamed: 0,구별,범죄,종합검거율
0,강남구,4.472701,73.941477
1,강동구,1.116551,74.016886
2,강북구,1.494746,81.388708
3,관악구,2.613667,73.724452
4,광진구,2.034438,83.383042


In [15]:
# rename row '총예산' to '예산(만원)'
SeoulData.rename(columns={SeoulData.columns[8] : '예산(만원)'}, inplace=True)
SeoulData.head()

Unnamed: 0,구별,CCTV,인구수,한국인,외국인,고령자,외국인비율,고령자비율,예산(만원)
0,강남구,2780,570500.0,565550.0,4950.0,63167.0,0.86766,11.072217,1499273
1,강동구,773,453233.0,449019.0,4214.0,54622.0,0.929765,12.051638,1237844
2,강북구,748,330192.0,326686.0,3506.0,54813.0,1.061806,16.600342,1059703
3,강서구,884,603772.0,597248.0,6524.0,72548.0,1.08054,12.015794,1439528
4,관악구,1496,525515.0,507203.0,18312.0,68082.0,3.484582,12.955291,1254073


In [None]:
# merge SeoulData with SeoulCrime on '구별'
SeoulData = pd.merge(SeoulData, SeoulCrime, on='구별')
SeoulData.head()

In [None]:
SeoulData.sort_values(by='종합검거율', ascending=False, inplace=True)
SeoulData

In [26]:
# make another dataframe Seouldata_sortCri, which contains sorted SeoulData by '범죄' in descending order
SeoulData_sortCri = SeoulData.sort_values(by='범죄', ascending=False)

# SeoulData_sortCri

Unnamed: 0,구별,CCTV,인구수,한국인,외국인,고령자,외국인비율,고령자비율,예산(만원),범죄,종합검거율
21,강남구,2780,570500.0,565550.0,4950.0,63167.0,0.86766,11.072217,1499273,4.472701,73.941477
22,양천구,2034,479978.0,475949.0,4029.0,52975.0,0.839413,11.036964,1137885,4.297113,78.646432
11,영등포구,904,402985.0,368072.0,34913.0,52413.0,8.663598,13.006191,1102728,3.69958,70.909593
23,송파구,618,667483.0,660584.0,6899.0,72506.0,1.033584,10.862599,1307577,2.732611,73.63001
12,관악구,1496,525515.0,507203.0,18312.0,68082.0,3.484582,12.955291,1254073,2.613667,73.724452
17,마포구,574,389649.0,378566.0,11083.0,48765.0,2.844355,12.51511,989846,2.446908,74.341465
15,구로구,1561,447874.0,416487.0,31387.0,56833.0,7.007998,12.689506,1154912,2.398678,65.129416
19,서초구,1930,450310.0,445994.0,4316.0,51733.0,0.958451,11.488308,1067751,2.327368,66.776545
9,중랑구,660,414503.0,409882.0,4621.0,56774.0,1.114829,13.696885,1136415,2.2029,75.328887
20,광진구,707,372164.0,357211.0,14953.0,42214.0,4.017852,11.342849,987742,2.034438,83.383042


In [None]:
SeoulData.sort_values(by='외국인비율', ascending=False, inplace=True)
SeoulData

In [None]:
SeoulData.sort_values(by='고령자비율', ascending=False, inplace=True)
SeoulData

In [None]:
SeoulData_sortBudget = SeoulData.sort_values(by='예산(만원)', ascending=False)
SeoulData_sortBudget

In [None]:
"""
SeoulData_sortCri는 범죄 발생빈도와 검거율 기준으로 정리
SeoulData_sortBudget은 예산 기준으로 정리 
남은 할 일: 
    1. 각각의 데이터프레임을 시각화 (차트, 그래프, 히트맵)
    2. 시각화한 자료 바탕으로 결론 도출 작업 진행
"""