# 중구 - 명동 데이터 전처리 (업종별)

In [1]:
import warnings
warnings.filterwarnings(action='ignore') 

import numpy as np
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.2f' % x)

# 폰트 설정 방법 2
import matplotlib as plt
plt.rcParams["font.family"] = 'Haansoft Dotum'
plt.rcParams['axes.unicode_minus'] = False
%matplotlib inline
# 브라우저에서 바로 이미지를 그린다.

from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder

import time
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.metrics import mean_squared_error

## 1. 데이터 불러오기
### (1) 업종별 점포 데이터

In [3]:
# 2020년 = service_info_2020, 
# 2019년 = service_info_2019, 
# .... 
# 2015년 = service_info_2015

for i in range(2015, 2021):
    globals()['service_info_{}'.format(i)] = pd.read_csv(path + str(i) + '.csv', encoding = 'cp949')

In [4]:
# 확인

service_info_2020.head()

Unnamed: 0,기준_년_코드,기준_분기_코드,상권_구분_코드,상권_구분_코드_명,상권_코드,상권_코드_명,서비스_업종_코드,서비스_업종_코드_명,점포_수,유사_업종_점포_수,개업_율,개업_점포_수,폐업_률,폐업_점포_수,프랜차이즈_점포_수
0,2020,2,U,관광특구,1001496,강남 마이스 관광특구,CS300043,전자상거래업,8,8,0,0,0,0,0
1,2020,2,U,관광특구,1001496,강남 마이스 관광특구,CS300042,주유소,1,1,0,0,0,0,0
2,2020,2,U,관광특구,1001496,강남 마이스 관광특구,CS300041,예술품,9,9,0,0,0,0,0
3,2020,2,U,관광특구,1001496,강남 마이스 관광특구,CS300040,재생용품 판매점,0,0,0,0,0,0,0
4,2020,2,U,관광특구,1001496,강남 마이스 관광특구,CS300038,자동차부품,10,10,0,0,0,0,0


### (2) 업종별 추정매출 데이터

In [6]:
# 2020년 = service_sales_2020, 
# 2019년 = service_sales_2019, 
# .... 
# 2015년 = service_sales_2015

for i in range(2015, 2021):
    globals()['service_sales_{}'.format(i)] = pd.read_csv(path + str(i) + '.csv', encoding = 'cp949')

In [7]:
# 확인

service_sales_2020.head()

Unnamed: 0,기준_년_코드,기준_분기_코드,상권_구분_코드,상권_구분_코드_명,상권_코드,상권_코드_명,서비스_업종_코드,서비스_업종_코드_명,당월_매출_금액,당월_매출_건수,...,시간대_건수~24_매출_건수,남성_매출_건수,여성_매출_건수,연령대_10_매출_건수,연령대_20_매출_건수,연령대_30_매출_건수,연령대_40_매출_건수,연령대_50_매출_건수,연령대_60_이상_매출_건수,점포수
0,2020,2,U,관광특구,1001496,강남 마이스 관광특구,CS300043,전자상거래업,6415600,169,...,0,63,106,0,10,139,10,5,5,8
1,2020,2,U,관광특구,1001496,강남 마이스 관광특구,CS300032,가전제품,1853749205,8319,...,132,4331,3363,66,2146,2181,1234,1062,1003,8
2,2020,2,U,관광특구,1001496,강남 마이스 관광특구,CS300031,가구,10400909742,5270,...,35,2458,2812,0,0,245,1441,1722,1862,5
3,2020,2,U,관광특구,1001496,강남 마이스 관광특구,CS300028,화초,807257363,28506,...,580,9007,18931,232,10926,10020,3876,2101,781,6
4,2020,2,U,관광특구,1001496,강남 마이스 관광특구,CS300027,섬유제품,1410497003,41063,...,1445,8135,32928,1478,20434,8248,5427,4241,1235,6


In [8]:
# 2015 ~ 2020 추정매출, 폐업률 수직 통합

total_service_info = pd.DataFrame()
total_service_sales = pd.DataFrame()

# 점포 데이터 통합 (2015 ~ 2020)
for i in range (2015, 2021):
    total_service_info = total_service_info.append(globals()['service_info_{}'.format(i)], ignore_index = True)

# 불필요한 column 제거
total_service_info = total_service_info.drop(['상권_구분_코드', '상권_구분_코드_명'], axis=1)

# 추정매출 데이터 통합 (2015 ~ 2020)
for i in range (2015, 2021):
    total_service_sales = total_service_sales.append(globals()['service_sales_{}'.format(i)], ignore_index = True)

# 불필요한 column 제거
total_service_sales = total_service_sales.drop(['상권_구분_코드', '상권_구분_코드_명'], axis=1)

In [9]:
# 결과 확인
# 통합 점포 데이터

total_service_info.head()

Unnamed: 0,기준_년_코드,기준_분기_코드,상권_코드,상권_코드_명,서비스_업종_코드,서비스_업종_코드_명,점포_수,유사_업종_점포_수,개업_율,개업_점포_수,폐업_률,폐업_점포_수,프랜차이즈_점포_수
0,2015,1,1001490,길동복조리시장,CS300011,일반의류,33,33,0,0,3,1,0
1,2015,1,1001046,서울 관악구 신림역_4,CS300001,슈퍼마켓,18,18,0,0,0,0,0
2,2015,1,1000609,개봉로11길,CS100001,한식음식점,9,9,0,0,0,0,0
3,2015,1,1000910,도산대로81길,CS100004,양식음식점,9,9,22,2,33,3,0
4,2015,1,1000862,방배로18길,CS100002,중식음식점,1,2,0,0,0,0,1


In [10]:
# 결측치 확인

total_service_info.isnull().sum()

기준_년_코드        0
기준_분기_코드       0
상권_코드          0
상권_코드_명        0
서비스_업종_코드      0
서비스_업종_코드_명    0
점포_수           0
유사_업종_점포_수     0
개업_율           0
개업_점포_수        0
폐업_률           0
폐업_점포_수        0
프랜차이즈_점포_수     0
dtype: int64

In [11]:
# 결과 확인
# 통합 추정매출 데이터

total_service_sales.head()

Unnamed: 0,기준_년_코드,기준_분기_코드,상권_코드,상권_코드_명,서비스_업종_코드,서비스_업종_코드_명,당월_매출_금액,당월_매출_건수,주중_매출_비율,주말_매출_비율,...,시간대_건수~24_매출_건수,남성_매출_건수,여성_매출_건수,연령대_10_매출_건수,연령대_20_매출_건수,연령대_30_매출_건수,연령대_40_매출_건수,연령대_50_매출_건수,연령대_60_이상_매출_건수,점포수
0,2015,1,1001018,포이사거리_2,CS300021,문구,3390014653,88106,98,3,...,41,26390,18879,385,8047,13300,11808,8195,3532,19
1,2015,1,1000481,성미산로11길,CS300011,일반의류,477810637,1339,86,14,...,9,781,406,0,64,472,216,328,108,7
2,2015,1,1000440,세검정로1길,CS200008,한의원,29303647,582,85,15,...,0,258,324,0,0,189,18,197,178,1
3,2015,1,1001458,인헌시장,CS100005,제과점,312469941,38207,68,32,...,3475,13485,22883,352,6467,13345,9907,4002,2294,3
4,2015,1,1000756,사당로8길,CS200016,당구장,38474191,2583,61,39,...,854,2317,197,165,827,664,544,237,77,1


In [12]:
# 결측치 확인

total_service_sales.isnull().sum()

기준_년_코드            0
기준_분기_코드           0
상권_코드              0
상권_코드_명            0
서비스_업종_코드          0
                  ..
연령대_30_매출_건수       0
연령대_40_매출_건수       0
연령대_50_매출_건수       0
연령대_60_이상_매출_건수    0
점포수                0
Length: 78, dtype: int64

In [13]:
# 공통되는 column 찾기

c_info = set(total_service_info.columns)
c_sales = set(total_service_sales.columns)
c_same = list(set.intersection(c_info, c_sales))

c_same

['서비스_업종_코드', '기준_년_코드', '서비스_업종_코드_명', '상권_코드', '기준_분기_코드', '상권_코드_명']

In [14]:
# total_df = 통합 점포 데이터 + 통합 추정매출 데이터

total_df = pd.merge(total_service_info, total_service_sales, how = 'left', on = c_same)

In [15]:
# 결과 확인
# 전체 통합 데이터

total_df.head()

Unnamed: 0,기준_년_코드,기준_분기_코드,상권_코드,상권_코드_명,서비스_업종_코드,서비스_업종_코드_명,점포_수,유사_업종_점포_수,개업_율,개업_점포_수,...,시간대_건수~24_매출_건수,남성_매출_건수,여성_매출_건수,연령대_10_매출_건수,연령대_20_매출_건수,연령대_30_매출_건수,연령대_40_매출_건수,연령대_50_매출_건수,연령대_60_이상_매출_건수,점포수
0,2015,1,1001490,길동복조리시장,CS300011,일반의류,33,33,0,0,...,643.0,4389.0,11072.0,97.0,1205.0,2267.0,4759.0,4584.0,2549.0,33.0
1,2015,1,1001046,서울 관악구 신림역_4,CS300001,슈퍼마켓,18,18,0,0,...,48014.0,79772.0,108601.0,1293.0,68332.0,55144.0,26564.0,23568.0,13473.0,18.0
2,2015,1,1000609,개봉로11길,CS100001,한식음식점,9,9,0,0,...,664.0,1620.0,951.0,7.0,199.0,509.0,785.0,662.0,407.0,9.0
3,2015,1,1000910,도산대로81길,CS100004,양식음식점,9,9,22,2,...,1908.0,4735.0,6426.0,81.0,3347.0,4750.0,1784.0,929.0,276.0,10.0
4,2015,1,1000862,방배로18길,CS100002,중식음식점,1,2,0,0,...,75.0,1726.0,892.0,16.0,509.0,1021.0,731.0,220.0,121.0,2.0


In [16]:
# 결측치 확인

total_df.isnull().sum()

기준_년_코드                  0
기준_분기_코드                 0
상권_코드                    0
상권_코드_명                  0
서비스_업종_코드                0
                    ...   
연령대_30_매출_건수       1115448
연령대_40_매출_건수       1115448
연령대_50_매출_건수       1115448
연령대_60_이상_매출_건수    1115448
점포수                1115448
Length: 85, dtype: int64

In [17]:
# total_df.to_csv('./통합_점포_추정매출.csv', encoding = 'cp949')

## 2. 데이터 전처리
### (1) '명동거리' 상권 추출

In [18]:
# 점포-추정매출 통합 데이터 중 '명동거리'에 해당하는 상권만 추출
# '명동거리' 상권의 상권 코드 = 1001203

df_m = total_df[total_df['상권_코드'] == 1001203]

In [19]:
# 결과 확인

df_m.head()

Unnamed: 0,기준_년_코드,기준_분기_코드,상권_코드,상권_코드_명,서비스_업종_코드,서비스_업종_코드_명,점포_수,유사_업종_점포_수,개업_율,개업_점포_수,...,시간대_건수~24_매출_건수,남성_매출_건수,여성_매출_건수,연령대_10_매출_건수,연령대_20_매출_건수,연령대_30_매출_건수,연령대_40_매출_건수,연령대_50_매출_건수,연령대_60_이상_매출_건수,점포수
222,2015,1,1001203,명동거리,CS300017,시계및귀금속,42,42,0,0,...,6399.0,18228.0,40919.0,1734.0,36620.0,12246.0,4906.0,2964.0,681.0,42.0
265,2015,1,1001203,명동거리,CS300009,청과상,3,3,33,1,...,0.0,1519.0,2407.0,0.0,1265.0,1681.0,861.0,121.0,0.0,3.0
2241,2015,1,1001203,명동거리,CS300023,미용재료,9,9,11,1,...,,,,,,,,,,
4935,2015,1,1001203,명동거리,CS200034,여관,8,9,0,0,...,742.0,2854.0,1735.0,0.0,2602.0,1053.0,603.0,253.0,79.0,9.0
6388,2015,1,1001203,명동거리,CS300026,완구,2,2,0,0,...,6751.0,17036.0,27167.0,1812.0,19910.0,8944.0,7990.0,4266.0,1280.0,2.0


In [20]:
# 결과 확인

df_m.shape

(1881, 85)

### (2)  '일반의류', '한식음식점' 업종 선택

In [36]:
cloth = df_m['서비스_업종_코드_명'] == '일반의류'
kfood = df_m['서비스_업종_코드_명'] == '한식음식점'

df_m_cloth = df_m[cloth].reset_index(drop = True)
df_m_kfood = df_m[kfood].reset_index(drop = True)

In [37]:
# 결과 확인
# '한식음식점'
# '한식음식점' 업종 코드 = CS100001

df_m_kfood.head()

Unnamed: 0,기준_년_코드,기준_분기_코드,상권_코드,상권_코드_명,서비스_업종_코드,서비스_업종_코드_명,점포_수,유사_업종_점포_수,개업_율,개업_점포_수,...,시간대_건수~24_매출_건수,남성_매출_건수,여성_매출_건수,연령대_10_매출_건수,연령대_20_매출_건수,연령대_30_매출_건수,연령대_40_매출_건수,연령대_50_매출_건수,연령대_60_이상_매출_건수,점포수
0,2015,1,1001203,명동거리,CS100001,한식음식점,175,211,3,6,...,60940.0,258105.0,258474.0,10710.0,172366.0,142452.0,99767.0,59920.0,31382.0,210.0
1,2015,2,1001203,명동거리,CS100001,한식음식점,170,205,2,5,...,59855.0,257359.0,267430.0,11073.0,173027.0,148498.0,99600.0,60127.0,32474.0,206.0
2,2015,3,1001203,명동거리,CS100001,한식음식점,168,202,3,5,...,59725.0,271558.0,283035.0,12321.0,186568.0,150954.0,106829.0,63620.0,34301.0,202.0
3,2015,4,1001203,명동거리,CS100001,한식음식점,170,206,5,10,...,68898.0,286213.0,286531.0,14800.0,188863.0,152950.0,111902.0,68185.0,36048.0,206.0
4,2016,1,1001203,명동거리,CS100001,한식음식점,176,215,8,18,...,74110.0,301029.0,306635.0,13813.0,204240.0,160687.0,117693.0,71955.0,39296.0,210.0


In [38]:
# 결과 확인
# '한식음식점'

df_m_kfood.shape

(22, 85)

In [39]:
# column별 결측치 확인
# '한식음식점'

df_m_kfood.isnull().sum()

기준_년_코드            0
기준_분기_코드           0
상권_코드              0
상권_코드_명            0
서비스_업종_코드          0
                  ..
연령대_30_매출_건수       0
연령대_40_매출_건수       0
연령대_50_매출_건수       0
연령대_60_이상_매출_건수    0
점포수                0
Length: 85, dtype: int64

In [40]:
# 전체 결측치 확인
# '한식음식점'

n_of_kfood = df_m_kfood.isnull().sum()
n_of_kfood.sum()

0

In [41]:
# 결과 확인
# '일반의류'
# '일반의류' 업종 코드 = CS300011

df_m_cloth.head()

Unnamed: 0,기준_년_코드,기준_분기_코드,상권_코드,상권_코드_명,서비스_업종_코드,서비스_업종_코드_명,점포_수,유사_업종_점포_수,개업_율,개업_점포_수,...,시간대_건수~24_매출_건수,남성_매출_건수,여성_매출_건수,연령대_10_매출_건수,연령대_20_매출_건수,연령대_30_매출_건수,연령대_40_매출_건수,연령대_50_매출_건수,연령대_60_이상_매출_건수,점포수
0,2015,1,1001203,명동거리,CS300011,일반의류,731,732,1,9,...,102759.0,280187.0,592586.0,50756.0,465099.0,165532.0,107597.0,70146.0,13654.0,733.0
1,2015,2,1001203,명동거리,CS300011,일반의류,725,726,1,6,...,125656.0,322910.0,695472.0,50686.0,534136.0,206561.0,128161.0,82228.0,16612.0,728.0
2,2015,3,1001203,명동거리,CS300011,일반의류,708,709,1,7,...,134950.0,354514.0,760756.0,62647.0,589233.0,211785.0,145112.0,88327.0,18170.0,715.0
3,2015,4,1001203,명동거리,CS300011,일반의류,703,704,1,7,...,105059.0,269793.0,567248.0,55122.0,431018.0,153923.0,115259.0,67505.0,14212.0,706.0
4,2016,1,1001203,명동거리,CS300011,일반의류,688,689,1,7,...,86663.0,227863.0,523951.0,44370.0,398173.0,139536.0,93570.0,62025.0,14140.0,689.0


In [42]:
# 결과 확인
# '일반의류'

df_m_cloth.shape

(22, 85)

In [43]:
# column별 결측치 확인
# '일반의류'

df_m_cloth.isnull().sum()

기준_년_코드            0
기준_분기_코드           0
상권_코드              0
상권_코드_명            0
서비스_업종_코드          0
                  ..
연령대_30_매출_건수       0
연령대_40_매출_건수       0
연령대_50_매출_건수       0
연령대_60_이상_매출_건수    0
점포수                0
Length: 85, dtype: int64

In [44]:
# 전체 결측치 확인
# '일반의류'

n_of_cloth = df_m_cloth.isnull().sum()
n_of_cloth.sum()

0

### (3) 데이터 정규화

In [45]:
# 한식음식점 데이터 column

df_m_kfood.columns

Index(['기준_년_코드', '기준_분기_코드', '상권_코드', '상권_코드_명', '서비스_업종_코드', '서비스_업종_코드_명',
       '점포_수', '유사_업종_점포_수', '개업_율', '개업_점포_수', '폐업_률', '폐업_점포_수',
       '프랜차이즈_점포_수', '당월_매출_금액', '당월_매출_건수', '주중_매출_비율', '주말_매출_비율',
       '월요일_매출_비율', '화요일_매출_비율', '수요일_매출_비율', '목요일_매출_비율', '금요일_매출_비율',
       '토요일_매출_비율', '일요일_매출_비율', '시간대_00~06_매출_비율', '시간대_06~11_매출_비율',
       '시간대_11~14_매출_비율', '시간대_14~17_매출_비율', '시간대_17~21_매출_비율',
       '시간대_21~24_매출_비율', '남성_매출_비율', '여성_매출_비율', '연령대_10_매출_비율',
       '연령대_20_매출_비율', '연령대_30_매출_비율', '연령대_40_매출_비율', '연령대_50_매출_비율',
       '연령대_60_이상_매출_비율', '주중_매출_금액', '주말_매출_금액', '월요일_매출_금액', '화요일_매출_금액',
       '수요일_매출_금액', '목요일_매출_금액', '금요일_매출_금액', '토요일_매출_금액', '일요일_매출_금액',
       '시간대_00~06_매출_금액', '시간대_06~11_매출_금액', '시간대_11~14_매출_금액',
       '시간대_14~17_매출_금액', '시간대_17~21_매출_금액', '시간대_21~24_매출_금액', '남성_매출_금액',
       '여성_매출_금액', '연령대_10_매출_금액', '연령대_20_매출_금액', '연령대_30_매출_금액',
       '연령대_40_매출_금액', '연령대_50_매출_금액', '연령대_60_이상_매출_금액', '주중_매출_건수',
       '주말_매출_건수'

In [46]:
# 불필요한 column 제거

useless = ['상권_코드', '상권_코드_명', '서비스_업종_코드_명', '점포수']

df_m_kfood = df_m_kfood.drop(useless, axis = 1)

len(df_m_kfood.columns)

81

In [47]:
# Min-Max 정규화 진행

from sklearn import preprocessing

for i in range(3,81):
    if df_m_kfood.columns[i] == '폐업_률':
        continue
    else:
        x = df_m_kfood[df_m_kfood.columns[i]].values # Return numpy array
        min_max_scaler = preprocessing.MinMaxScaler()
        x_scaled = min_max_scaler.fit_transform(pd.DataFrame(x))
        # 적용
        df_m_kfood[df_m_kfood.columns[i]] = pd.DataFrame(x_scaled)ㅡ

In [48]:
# 일반의류 데이터 column

df_m_cloth.columns

Index(['기준_년_코드', '기준_분기_코드', '상권_코드', '상권_코드_명', '서비스_업종_코드', '서비스_업종_코드_명',
       '점포_수', '유사_업종_점포_수', '개업_율', '개업_점포_수', '폐업_률', '폐업_점포_수',
       '프랜차이즈_점포_수', '당월_매출_금액', '당월_매출_건수', '주중_매출_비율', '주말_매출_비율',
       '월요일_매출_비율', '화요일_매출_비율', '수요일_매출_비율', '목요일_매출_비율', '금요일_매출_비율',
       '토요일_매출_비율', '일요일_매출_비율', '시간대_00~06_매출_비율', '시간대_06~11_매출_비율',
       '시간대_11~14_매출_비율', '시간대_14~17_매출_비율', '시간대_17~21_매출_비율',
       '시간대_21~24_매출_비율', '남성_매출_비율', '여성_매출_비율', '연령대_10_매출_비율',
       '연령대_20_매출_비율', '연령대_30_매출_비율', '연령대_40_매출_비율', '연령대_50_매출_비율',
       '연령대_60_이상_매출_비율', '주중_매출_금액', '주말_매출_금액', '월요일_매출_금액', '화요일_매출_금액',
       '수요일_매출_금액', '목요일_매출_금액', '금요일_매출_금액', '토요일_매출_금액', '일요일_매출_금액',
       '시간대_00~06_매출_금액', '시간대_06~11_매출_금액', '시간대_11~14_매출_금액',
       '시간대_14~17_매출_금액', '시간대_17~21_매출_금액', '시간대_21~24_매출_금액', '남성_매출_금액',
       '여성_매출_금액', '연령대_10_매출_금액', '연령대_20_매출_금액', '연령대_30_매출_금액',
       '연령대_40_매출_금액', '연령대_50_매출_금액', '연령대_60_이상_매출_금액', '주중_매출_건수',
       '주말_매출_건수'

In [49]:
# 불필요한 column 제거

useless = ['상권_코드', '상권_코드_명', '서비스_업종_코드_명', '점포수']

df_m_cloth = df_m_cloth.drop(useless, axis = 1)

len(df_m_cloth.columns)

81

In [50]:
# Min-Max 정규화 진행

for i in range(3,81):
    if df_m_cloth.columns[i] == '폐업_률':
        continue
    else:
        x = df_m_cloth[df_m_cloth.columns[i]].values # Return numpy array
        min_max_scaler = preprocessing.MinMaxScaler()
        x_scaled = min_max_scaler.fit_transform(pd.DataFrame(x))
        # 적용
        df_m_cloth[df_m_cloth.columns[i]] = pd.DataFrame(x_scaled)

In [51]:
# 결과 확인
# 한식음식점

df_m_kfood

Unnamed: 0,기준_년_코드,기준_분기_코드,서비스_업종_코드,점포_수,유사_업종_점포_수,개업_율,개업_점포_수,폐업_률,폐업_점포_수,프랜차이즈_점포_수,...,시간대_건수~21_매출_건수,시간대_건수~24_매출_건수,남성_매출_건수,여성_매출_건수,연령대_10_매출_건수,연령대_20_매출_건수,연령대_30_매출_건수,연령대_40_매출_건수,연령대_50_매출_건수,연령대_60_이상_매출_건수
0,2015,1,CS100001,0.94,0.85,0.29,0.25,3,0.46,0.67,...,0.82,0.76,0.66,0.67,0.68,0.76,0.78,0.68,0.45,0.21
1,2015,2,CS100001,0.67,0.62,0.14,0.19,5,0.85,0.56,...,0.77,0.74,0.65,0.71,0.71,0.77,0.85,0.68,0.46,0.23
2,2015,3,CS100001,0.56,0.5,0.29,0.19,4,0.62,0.44,...,0.85,0.74,0.73,0.79,0.8,0.87,0.88,0.8,0.53,0.27
3,2015,4,CS100001,0.67,0.65,0.57,0.5,3,0.46,0.67,...,0.91,0.9,0.82,0.81,0.99,0.89,0.91,0.88,0.62,0.31
4,2016,1,CS100001,1.0,1.0,1.0,1.0,4,0.69,1.0,...,1.0,1.0,0.91,0.9,0.92,1.0,1.0,0.98,0.7,0.38
5,2016,2,CS100001,0.94,0.88,0.43,0.38,5,0.85,0.78,...,0.89,0.92,0.8,0.88,0.92,0.89,0.93,0.93,0.66,0.43
6,2016,3,CS100001,0.72,0.58,0.29,0.19,6,1.0,0.33,...,0.84,0.8,0.75,0.81,0.84,0.85,0.82,0.85,0.63,0.4
7,2016,4,CS100001,0.89,0.73,0.43,0.38,2,0.31,0.44,...,0.94,0.99,0.9,0.88,1.0,0.94,0.95,0.97,0.71,0.48
8,2017,1,CS100001,0.83,0.69,0.14,0.12,2,0.38,0.44,...,0.79,0.79,0.77,0.73,0.76,0.78,0.82,0.82,0.59,0.46
9,2017,2,CS100001,0.67,0.62,0.29,0.25,4,0.62,0.56,...,0.78,0.74,0.75,0.77,0.74,0.79,0.85,0.79,0.6,0.5


In [52]:
# 결과 확인
# 일반의류

df_m_cloth

Unnamed: 0,기준_년_코드,기준_분기_코드,서비스_업종_코드,점포_수,유사_업종_점포_수,개업_율,개업_점포_수,폐업_률,폐업_점포_수,프랜차이즈_점포_수,...,시간대_건수~21_매출_건수,시간대_건수~24_매출_건수,남성_매출_건수,여성_매출_건수,연령대_10_매출_건수,연령대_20_매출_건수,연령대_30_매출_건수,연령대_40_매출_건수,연령대_50_매출_건수,연령대_60_이상_매출_건수
0,2015,1,CS300011,1.0,1.0,0.5,0.9,4,0.82,1.0,...,0.78,0.74,0.75,0.73,0.8,0.75,0.73,0.67,0.73,0.39
1,2015,2,CS300011,0.96,0.96,0.5,0.6,2,0.33,1.0,...,0.9,0.92,0.89,0.9,0.8,0.89,0.97,0.85,0.91,0.57
2,2015,3,CS300011,0.86,0.86,0.5,0.7,3,0.7,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.67
3,2015,4,CS300011,0.84,0.84,0.5,0.7,2,0.33,1.0,...,0.7,0.76,0.71,0.69,0.87,0.69,0.66,0.74,0.69,0.42
4,2016,1,CS300011,0.75,0.75,0.5,0.7,3,0.64,1.0,...,0.64,0.6,0.57,0.63,0.69,0.62,0.57,0.55,0.61,0.42
5,2016,2,CS300011,0.74,0.74,0.5,0.8,1,0.24,1.0,...,0.64,0.71,0.59,0.68,0.66,0.66,0.62,0.62,0.69,0.53
6,2016,3,CS300011,0.65,0.65,1.0,1.0,4,0.76,1.0,...,0.62,0.7,0.54,0.64,0.7,0.64,0.54,0.54,0.58,0.44
7,2016,4,CS300011,0.64,0.64,1.0,1.0,2,0.33,1.0,...,0.46,0.53,0.43,0.49,0.56,0.47,0.41,0.47,0.48,0.38
8,2017,1,CS300011,0.49,0.48,0.5,0.8,5,1.0,0.0,...,0.41,0.41,0.41,0.42,0.42,0.42,0.39,0.38,0.46,0.49
9,2017,2,CS300011,0.44,0.44,0.5,0.9,3,0.48,0.0,...,0.56,0.59,0.57,0.62,0.48,0.57,0.62,0.65,0.74,0.86


In [53]:
# 데이터 저장

# df_m_cloth.to_csv("./myungdong_cloth_after.csv", encoding = 'cp949')
# df_m_kfood.to_csv("./myungdong_kfood_after.csv", encoding = 'cp949')