In [1]:
import pandas as pd
import numpy as np
import os

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.style as style
style.use('fivethirtyeight')

In [2]:
# 한글 폰트 설정

from matplotlib import font_manager, rc

font_name = font_manager.FontProperties(fname="/Windows/Fonts/gulim.ttc").get_name()
rc('font', family=font_name)

# Data Load

## files

- ayeong.csv
- sohyun.csv
- ppeum.csv
- hyein.csv

In [5]:
ayeong = pd.read_csv('./ayeong.csv', sep=',', encoding='cp949')
sohyun = pd.read_csv('./sohyun.csv', sep=',', encoding='utf-8')
ppeum = pd.read_csv('./ppeum.csv', sep=',', encoding='utf-8')
hyein = pd.read_csv('./hyein.csv', sep=',', encoding='cp949')

### 치안방법, 소음민원, 의료기관 데이터

In [6]:
ayeong.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 17 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   자치구       25 non-null     object 
 1   방범용cctv   25 non-null     int64  
 2   경찰서       25 non-null     int64  
 3   소음진동 민원   25 non-null     float64
 4   의료기관수     25 non-null     int64  
 5   소음민원_s    25 non-null     float64
 6   의료기관_s    25 non-null     float64
 7   치안방범_s    25 non-null     float64
 8   인구대비소음진동  25 non-null     float64
 9   인구대비cctv  25 non-null     float64
 10  인구대비경찰서   25 non-null     float64
 11  인구대비의료기관  25 non-null     float64
 12  p_소음민원_s  25 non-null     float64
 13  p_의료기관_s  25 non-null     float64
 14  p_치안방범_s  25 non-null     float64
 15  score     25 non-null     float64
 16  p_score   25 non-null     float64
dtypes: float64(13), int64(3), object(1)
memory usage: 3.4+ KB


In [9]:
ayeong.head(5)

Unnamed: 0,자치구,방범용cctv,경찰서,소음진동 민원,의료기관수,소음민원_s,의료기관_s,치안방범_s,인구대비소음진동,인구대비cctv,인구대비경찰서,인구대비의료기관,p_소음민원_s,p_의료기관_s,p_치안방범_s,score,p_score
0,강남구,3297,23,-4928.0,2647,0.0,1.0,1.0,-0.008737,0.02154,0.000152,0.017496,0.0,1.0,1.0,2.0,2.0
1,강동구,810,17,-1070.777778,792,0.894509,0.206926,0.326231,-0.002387,0.006443,0.000135,0.006277,0.873035,0.335379,0.559295,1.427666,1.76771
2,강북구,633,15,-1250.222222,463,0.852895,0.066268,0.224038,-0.003819,0.002764,6.6e-05,0.002025,0.676083,0.083462,0.190602,1.143201,0.950147
3,강서구,807,13,-868.666667,840,0.94138,0.227448,0.187737,-0.001463,0.002618,4.3e-05,0.002792,1.0,0.128898,0.096897,1.356564,1.225795
4,관악구,1998,20,-2522.444444,697,0.55786,0.16631,0.652697,-0.004975,0.005619,5.7e-05,0.001984,0.517142,0.081035,0.225881,1.376867,0.824057


In [20]:
ayeong.columns

Index(['자치구', '방범용cctv', '경찰서', '소음진동 민원', '의료기관수', '소음민원_s', '의료기관_s',
       '치안방범_s', '인구대비소음진동', '인구대비cctv', '인구대비경찰서', '인구대비의료기관', 'p_소음민원_s',
       'p_의료기관_s', 'p_치안방범_s', 'score', 'p_score'],
      dtype='object')

In [22]:
ayeong_fin = ayeong[['자치구', 'p_소음민원_s', 'p_의료기관_s', 'p_치안방범_s']]
ayeong_fin

Unnamed: 0,자치구,p_소음민원_s,p_의료기관_s,p_치안방범_s
0,강남구,0.0,1.0,1.0
1,강동구,0.873035,0.335379,0.559295
2,강북구,0.676083,0.083462,0.190602
3,강서구,1.0,0.128898,0.096897
4,관악구,0.517142,0.081035,0.225881
5,광진구,0.56291,0.058315,0.07292
6,구로구,0.856475,0.047224,0.12859
7,금천구,0.769849,0.011293,0.021401
8,노원구,0.970313,0.100882,0.156479
9,도봉구,0.957394,0.029088,0.005306


### 문화시설, 상업시설, 유흥시설 데이터

In [7]:
sohyun.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  25 non-null     int64  
 1   자치구         25 non-null     object 
 2   문화시설수       25 non-null     int64  
 3   상업시설수       25 non-null     int64  
 4   유흥시설수       25 non-null     int64  
 5   시설합계        25 non-null     int64  
 6   인구          25 non-null     int64  
 7   인구대비시설      25 non-null     float64
 8   스케일링        25 non-null     float64
dtypes: float64(2), int64(6), object(1)
memory usage: 1.9+ KB


In [8]:
sohyun.head(5)

Unnamed: 0.1,Unnamed: 0,자치구,문화시설수,상업시설수,유흥시설수,시설합계,인구,인구대비시설,스케일링
0,0,강남구,86,31321,3849,35256,547453,0.0644,0.151608
1,1,강동구,13,13134,1973,15120,431920,0.035006,0.041353
2,2,강북구,11,8215,1609,9835,322915,0.030457,0.024287
3,3,강서구,7,16880,2622,19509,603611,0.03232,0.031277
4,4,관악구,4,11791,2179,13974,520040,0.026871,0.010836


In [14]:
# 인구로 나눈 시설 수

sohyun['문화시설/인구'] = sohyun['문화시설수']/sohyun['인구']
sohyun['상업시설/인구'] = sohyun['상업시설수']/sohyun['인구']
sohyun['유흥시설/인구'] = sohyun['유흥시설수']/sohyun['인구']

sohyun

Unnamed: 0.1,Unnamed: 0,자치구,문화시설수,상업시설수,유흥시설수,시설합계,인구,인구대비시설,스케일링,문화시설/인구,상업시설/인구,유흥시설/인구
0,0,강남구,86,31321,3849,35256,547453,0.0644,0.151608,0.000157,0.057212,0.007031
1,1,강동구,13,13134,1973,15120,431920,0.035006,0.041353,3e-05,0.030408,0.004568
2,2,강북구,11,8215,1609,9835,322915,0.030457,0.024287,3.4e-05,0.02544,0.004983
3,3,강서구,7,16880,2622,19509,603611,0.03232,0.031277,1.2e-05,0.027965,0.004344
4,4,관악구,4,11791,2179,13974,520040,0.026871,0.010836,8e-06,0.022673,0.00419
5,5,광진구,29,11872,2153,14054,371063,0.037875,0.052112,7.8e-05,0.031995,0.005802
6,6,구로구,17,18041,1930,19988,438486,0.045584,0.081029,3.9e-05,0.041144,0.004402
7,7,금천구,12,13176,1276,14464,254021,0.05694,0.123626,4.7e-05,0.05187,0.005023
8,8,노원구,15,11209,1922,13146,548160,0.023982,0.0,2.7e-05,0.020448,0.003506
9,9,도봉구,11,7206,1299,8516,341649,0.024926,0.003541,3.2e-05,0.021092,0.003802


In [15]:
sohyun.columns

Index(['Unnamed: 0', '자치구', '문화시설수', '상업시설수', '유흥시설수', '시설합계', '인구', '인구대비시설',
       '스케일링', '문화시설/인구', '상업시설/인구', '유흥시설/인구'],
      dtype='object')

In [16]:
# 인구대비 시설 데이터의 스케일링값

from sklearn.preprocessing import minmax_scale

s_sohyun = pd.DataFrame(minmax_scale(sohyun.iloc[:,9:], axis=0), columns=['s_문화시설', 's_상업시설', 's_유흥시설'])
s_sohyun

Unnamed: 0,s_문화시설,s_상업시설,s_유흥시설
0,0.073418,0.141032,0.416547
1,0.011011,0.038208,0.12548
2,0.01296,0.019149,0.174499
3,0.001919,0.028835,0.098992
4,0.0,0.008535,0.080815
5,0.034626,0.044293,0.271355
6,0.015272,0.079391,0.105805
7,0.019435,0.120537,0.179282
8,0.009667,0.0,0.0
9,0.012042,0.002468,0.034968


In [17]:
sohyun_fin = pd.concat([sohyun[['자치구']], s_sohyun], axis=1)
sohyun_fin

Unnamed: 0,자치구,s_문화시설,s_상업시설,s_유흥시설
0,강남구,0.073418,0.141032,0.416547
1,강동구,0.011011,0.038208,0.12548
2,강북구,0.01296,0.019149,0.174499
3,강서구,0.001919,0.028835,0.098992
4,관악구,0.0,0.008535,0.080815
5,광진구,0.034626,0.044293,0.271355
6,구로구,0.015272,0.079391,0.105805
7,금천구,0.019435,0.120537,0.179282
8,노원구,0.009667,0.0,0.0
9,도봉구,0.012042,0.002468,0.034968


### 공원, 교육, 청결도 데이터

In [10]:
ppeum.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Unnamed: 0         25 non-null     int64  
 1   자치구                25 non-null     object 
 2   1인당_공원면적_scaled    25 non-null     float64
 3   1인당_교육_scaled      25 non-null     float64
 4   1인당_청결심각수준_scaled  25 non-null     float64
dtypes: float64(3), int64(1), object(1)
memory usage: 1.1+ KB


In [11]:
ppeum.head(5)

Unnamed: 0.1,Unnamed: 0,자치구,1인당_공원면적_scaled,1인당_교육_scaled,1인당_청결심각수준_scaled
0,0,종로구,1.0,0.207274,-1.0
1,1,중구,0.28866,0.752597,-0.997638
2,2,용산구,0.055965,0.082985,-0.488824
3,3,성동구,0.095729,0.122267,-0.368005
4,4,광진구,0.083947,0.085548,-0.183095


In [27]:
ppeum_fin = ppeum.drop('Unnamed: 0', axis=1)
ppeum_fin

Unnamed: 0,자치구,1인당_공원면적_scaled,1인당_교육_scaled,1인당_청결심각수준_scaled
0,종로구,1.0,0.207274,-1.0
1,중구,0.28866,0.752597,-0.997638
2,용산구,0.055965,0.082985,-0.488824
3,성동구,0.095729,0.122267,-0.368005
4,광진구,0.083947,0.085548,-0.183095
5,동대문구,0.0,0.056742,-0.240041
6,중랑구,0.138439,0.090692,-0.210151
7,성북구,0.217968,0.097548,-0.217461
8,강북구,0.612666,0.0,-0.240019
9,도봉구,0.39028,0.087637,-0.174192


### 대중교통접근성, 주차시설, 보행안전 데이터

In [12]:
hyein.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  25 non-null     int64  
 1   자치구         25 non-null     object 
 2   보행안전        25 non-null     float64
 3   주차시설        25 non-null     float64
 4   대중교통        25 non-null     float64
 5   보행안전_s      25 non-null     float64
 6   주차시설_s      25 non-null     float64
 7   대중교통_s      25 non-null     float64
dtypes: float64(6), int64(1), object(1)
memory usage: 1.7+ KB


In [13]:
hyein.head(5)

Unnamed: 0.1,Unnamed: 0,자치구,보행안전,주차시설,대중교통,보행안전_s,주차시설_s,대중교통_s
0,0,강남구,1.1e-05,0.000309,0.635439,0.04714,0.064837,0.195492
1,1,강동구,1.3e-05,0.000318,0.373064,0.092261,0.071191,0.085684
2,2,강북구,1.8e-05,0.00038,0.482599,0.211426,0.11505,0.131526
3,3,강서구,1.1e-05,0.000216,0.405004,0.026407,0.000433,0.099051
4,4,관악구,1.2e-05,0.000253,0.20296,0.063609,0.02624,0.014492


In [19]:
hyein.drop('Unnamed: 0', axis=1)

Unnamed: 0,자치구,보행안전,주차시설,대중교통,보행안전_s,주차시설_s,대중교통_s
0,강남구,1.1e-05,0.000309,0.635439,0.04714,0.064837,0.195492
1,강동구,1.3e-05,0.000318,0.373064,0.092261,0.071191,0.085684
2,강북구,1.8e-05,0.00038,0.482599,0.211426,0.11505,0.131526
3,강서구,1.1e-05,0.000216,0.405004,0.026407,0.000433,0.099051
4,관악구,1.2e-05,0.000253,0.20296,0.063609,0.02624,0.014492
5,광진구,1.8e-05,0.000398,0.559517,0.208902,0.127151,0.163718
6,구로구,1.5e-05,0.000313,0.554818,0.141708,0.067989,0.161751
7,금천구,2.6e-05,0.000635,0.772134,0.410646,0.293108,0.252702
8,노원구,1.1e-05,0.000218,0.406003,0.036058,0.001414,0.099469
9,도봉구,1.9e-05,0.000341,0.300645,0.242523,0.087811,0.055375


In [23]:
hyein.columns

Index(['Unnamed: 0', '자치구', '보행안전', '주차시설', '대중교통', '보행안전_s', '주차시설_s',
       '대중교통_s'],
      dtype='object')

In [24]:
hyein_fin = hyein[['자치구','보행안전_s', '주차시설_s', '대중교통_s']]
hyein_fin

Unnamed: 0,자치구,보행안전_s,주차시설_s,대중교통_s
0,강남구,0.04714,0.064837,0.195492
1,강동구,0.092261,0.071191,0.085684
2,강북구,0.211426,0.11505,0.131526
3,강서구,0.026407,0.000433,0.099051
4,관악구,0.063609,0.02624,0.014492
5,광진구,0.208902,0.127151,0.163718
6,구로구,0.141708,0.067989,0.161751
7,금천구,0.410646,0.293108,0.252702
8,노원구,0.036058,0.001414,0.099469
9,도봉구,0.242523,0.087811,0.055375


# Data Merge

In [31]:
# 각 데이터셋 자치구명 기준으로 통합

ayso = pd.merge(ayeong_fin, sohyun_fin)
pphy = pd.merge(ppeum_fin, hyein_fin)
final = pd.merge(ayso, pphy)
final.head(5)

Unnamed: 0,자치구,p_소음민원_s,p_의료기관_s,p_치안방범_s,s_문화시설,s_상업시설,s_유흥시설,1인당_공원면적_scaled,1인당_교육_scaled,1인당_청결심각수준_scaled,보행안전_s,주차시설_s,대중교통_s
0,강남구,0.0,1.0,1.0,0.073418,0.141032,0.416547,0.125184,1.0,-0.05785,0.04714,0.064837,0.195492
1,강동구,0.873035,0.335379,0.559295,0.011011,0.038208,0.12548,0.061856,0.075278,-0.142303,0.092261,0.071191,0.085684
2,강북구,0.676083,0.083462,0.190602,0.01296,0.019149,0.174499,0.612666,0.0,-0.240019,0.211426,0.11505,0.131526
3,강서구,1.0,0.128898,0.096897,0.001919,0.028835,0.098992,0.055965,0.330862,-0.015912,0.026407,0.000433,0.099051
4,관악구,0.517142,0.081035,0.225881,0.0,0.008535,0.080815,0.260677,0.056967,-0.083275,0.063609,0.02624,0.014492


In [32]:
final.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25 entries, 0 to 24
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   자치구                25 non-null     object 
 1   p_소음민원_s           25 non-null     float64
 2   p_의료기관_s           25 non-null     float64
 3   p_치안방범_s           25 non-null     float64
 4   s_문화시설             25 non-null     float64
 5   s_상업시설             25 non-null     float64
 6   s_유흥시설             25 non-null     float64
 7   1인당_공원면적_scaled    25 non-null     float64
 8   1인당_교육_scaled      25 non-null     float64
 9   1인당_청결심각수준_scaled  25 non-null     float64
 10  보행안전_s             25 non-null     float64
 11  주차시설_s             25 non-null     float64
 12  대중교통_s             25 non-null     float64
dtypes: float64(12), object(1)
memory usage: 2.7+ KB


In [33]:
final.columns

Index(['자치구', 'p_소음민원_s', 'p_의료기관_s', 'p_치안방범_s', 's_문화시설', 's_상업시설', 's_유흥시설',
       '1인당_공원면적_scaled', '1인당_교육_scaled', '1인당_청결심각수준_scaled', '보행안전_s',
       '주차시설_s', '대중교통_s'],
      dtype='object')

In [34]:
# 컬럼명 변경

final.columns = ['자치구', '소음민원', '의료기관', '치안방범', '문화시설', '상업시설', '유흥시설', '공원면적', '교육', '청결심각수준', '보행안전', '주차시설', '대중교통']
final.head(5)

Unnamed: 0,자치구,소음민원,의료기관,치안방범,문화시설,상업시설,유흥시설,공원면적,교육,청결심각수준,보행안전,주차시설,대중교통
0,강남구,0.0,1.0,1.0,0.073418,0.141032,0.416547,0.125184,1.0,-0.05785,0.04714,0.064837,0.195492
1,강동구,0.873035,0.335379,0.559295,0.011011,0.038208,0.12548,0.061856,0.075278,-0.142303,0.092261,0.071191,0.085684
2,강북구,0.676083,0.083462,0.190602,0.01296,0.019149,0.174499,0.612666,0.0,-0.240019,0.211426,0.11505,0.131526
3,강서구,1.0,0.128898,0.096897,0.001919,0.028835,0.098992,0.055965,0.330862,-0.015912,0.026407,0.000433,0.099051
4,관악구,0.517142,0.081035,0.225881,0.0,0.008535,0.080815,0.260677,0.056967,-0.083275,0.063609,0.02624,0.014492


In [36]:
# 자치구별 총 합계 산출

final['total'] = final[['소음민원', '의료기관', '치안방범', '문화시설', '상업시설', '유흥시설', '공원면적', '교육', '청결심각수준', '보행안전', '주차시설', '대중교통']].apply(lambda series:series.sum(), axis=1)
final.head(5)

Unnamed: 0,자치구,소음민원,의료기관,치안방범,문화시설,상업시설,유흥시설,공원면적,교육,청결심각수준,보행안전,주차시설,대중교통,total
0,강남구,0.0,1.0,1.0,0.073418,0.141032,0.416547,0.125184,1.0,-0.05785,0.04714,0.064837,0.195492,4.0058
1,강동구,0.873035,0.335379,0.559295,0.011011,0.038208,0.12548,0.061856,0.075278,-0.142303,0.092261,0.071191,0.085684,2.186375
2,강북구,0.676083,0.083462,0.190602,0.01296,0.019149,0.174499,0.612666,0.0,-0.240019,0.211426,0.11505,0.131526,1.987404
3,강서구,1.0,0.128898,0.096897,0.001919,0.028835,0.098992,0.055965,0.330862,-0.015912,0.026407,0.000433,0.099051,1.852347
4,관악구,0.517142,0.081035,0.225881,0.0,0.008535,0.080815,0.260677,0.056967,-0.083275,0.063609,0.02624,0.014492,1.252118


# EDA