# 데이터 불러오기 및 정보 파악

In [1]:
import pandas as pd
import numpy as np
# 데이터들 불러오기
df_deal = pd.read_csv("dataset\\after_data\\apartment_deal.csv",  encoding='UTF8')
df_month_rent = pd.read_csv("dataset\\after_data\\apartment_month_rent.csv",  encoding='UTF8')
df_full_rent = pd.read_csv("dataset\\after_data\\apartment_full_rent.csv",  encoding='UTF8')
df_economic = pd.read_csv("dataset\\after_data\\economic_data.csv",  encoding='UTF8')

# 거래량 관련 데이터 프레임 생성

## 아파트 매매 거래량 테이블 생성

In [2]:
df_deal.head()

Unnamed: 0,date,year,month,day,address_0,address_1,address_2,address_3,address_4,name,floor,deal_price
0,2011-07-09,2011,7,9,서울특별시,강남구,개포동,655.0,2.0,개포2차현대아파트(220),3,64000
1,2011-07-28,2011,7,28,서울특별시,강남구,개포동,655.0,2.0,개포2차현대아파트(220),5,65500
2,2011-01-19,2011,1,19,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,3,70500
3,2011-09-02,2011,9,2,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,5,85000
4,2011-12-17,2011,12,17,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,1,68000


In [3]:
df_deal.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 882185 entries, 0 to 882184
Data columns (total 12 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   date        882185 non-null  object 
 1   year        882185 non-null  int64  
 2   month       882185 non-null  int64  
 3   day         882185 non-null  int64  
 4   address_0   882185 non-null  object 
 5   address_1   882185 non-null  object 
 6   address_2   882185 non-null  object 
 7   address_3   882185 non-null  float64
 8   address_4   882185 non-null  float64
 9   name        882185 non-null  object 
 10  floor       882185 non-null  int64  
 11  deal_price  882185 non-null  int64  
dtypes: float64(2), int64(5), object(5)
memory usage: 80.8+ MB


In [4]:
# 월별 거래량을 계산
df_count = df_deal.groupby(["year","month"])["name"].agg('count').copy()
df_count = df_count.reset_index(["year","month"])
df_count.columns = ["year","month","deal_count"]
df_count

Unnamed: 0,year,month,deal_count
0,2011,1,7179
1,2011,2,6026
2,2011,3,5419
3,2011,4,4028
4,2011,5,3836
...,...,...,...
139,2022,8,760
140,2022,9,649
141,2022,10,574
142,2022,11,750


In [5]:
df_count.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   year        144 non-null    int64
 1   month       144 non-null    int64
 2   deal_count  144 non-null    int64
dtypes: int64(3)
memory usage: 3.5 KB


## 아파트 전세 거래량 정보 추가

In [6]:
df_full_rent.head()

Unnamed: 0,date,year,month,day,address_0,address_1,address_2,address_3,address_4,name,floor,full_rent_price
0,2011-01-05,2011,1,5,서울특별시,강남구,개포동,655.0,2.0,개포2차현대아파트(220),7.0,35000
1,2011-01-18,2011,1,18,서울특별시,강남구,개포동,655.0,2.0,개포2차현대아파트(220),8.0,20000
2,2011-02-01,2011,2,1,서울특별시,강남구,개포동,655.0,2.0,개포2차현대아파트(220),5.0,24000
3,2011-02-11,2011,2,11,서울특별시,강남구,개포동,655.0,2.0,개포2차현대아파트(220),9.0,31000
4,2011-02-24,2011,2,24,서울특별시,강남구,개포동,655.0,2.0,개포2차현대아파트(220),9.0,30500


In [7]:
df_temp = df_full_rent.groupby(["year","month"])["name"].agg('count').copy()
df_temp = df_temp.reset_index(["year","month"])
df_temp.columns = ["year","month","full_rent_count"]
df_temp

Unnamed: 0,year,month,full_rent_count
0,2011,1,12336
1,2011,2,12261
2,2011,3,12121
3,2011,4,9754
4,2011,5,9280
...,...,...,...
139,2022,8,11341
140,2022,9,10258
141,2022,10,10559
142,2022,11,8890


In [8]:
df_count=pd.merge(df_count,df_temp, left_on=["year","month"], right_on=["year","month"], how="inner")
df_count.head()

Unnamed: 0,year,month,deal_count,full_rent_count
0,2011,1,7179,12336
1,2011,2,6026,12261
2,2011,3,5419,12121
3,2011,4,4028,9754
4,2011,5,3836,9280


In [9]:
df_count.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 144 entries, 0 to 143
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   year             144 non-null    int64
 1   month            144 non-null    int64
 2   deal_count       144 non-null    int64
 3   full_rent_count  144 non-null    int64
dtypes: int64(4)
memory usage: 5.6 KB


## 아파트 월세 거래량 정보 추가

In [10]:
df_month_rent.head()

Unnamed: 0,date,year,month,day,address_0,address_1,address_2,address_3,address_4,name,floor,rent_deposit,month_rent_price
0,2011-03-18,2011,3,18,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,1.0,19000,63
1,2011-04-09,2011,4,9,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,1.0,21000,35
2,2011-07-09,2011,7,9,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,4.0,3000,160
3,2011-09-19,2011,9,19,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,1.0,6000,140
4,2011-09-20,2011,9,20,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,2.0,5000,160


In [11]:
df_temp = df_month_rent.groupby(["year","month"])["name"].agg('count').copy()
df_temp = df_temp.reset_index(["year","month"])
df_temp.columns = ["year","month","month_rent_count"]
df_temp

Unnamed: 0,year,month,month_rent_count
0,2011,1,2514
1,2011,2,2711
2,2011,3,2775
3,2011,4,2210
4,2011,5,2168
...,...,...,...
139,2022,8,7415
140,2022,9,7793
141,2022,10,7694
142,2022,11,7709


In [12]:
df_count=pd.merge(df_count,df_temp, left_on=["year","month"], right_on=["year","month"], how="inner")
df_count.head()

Unnamed: 0,year,month,deal_count,full_rent_count,month_rent_count
0,2011,1,7179,12336,2514
1,2011,2,6026,12261,2711
2,2011,3,5419,12121,2775
3,2011,4,4028,9754,2210
4,2011,5,3836,9280,2168


In [13]:
df_count.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 144 entries, 0 to 143
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   year              144 non-null    int64
 1   month             144 non-null    int64
 2   deal_count        144 non-null    int64
 3   full_rent_count   144 non-null    int64
 4   month_rent_count  144 non-null    int64
dtypes: int64(5)
memory usage: 6.8 KB


## 월 정보들 shift

- 해당 달의 거래량은 다음달에 알 수 있음으로 한칸씩 shift

In [14]:
df_count['deal_count'] = df_count['deal_count'].shift(1)
df_count['month_rent_count'] = df_count['month_rent_count'].shift(1)
df_count['full_rent_count'] = df_count['full_rent_count'].shift(1)
df_count

Unnamed: 0,year,month,deal_count,full_rent_count,month_rent_count
0,2011,1,,,
1,2011,2,7179.0,12336.0,2514.0
2,2011,3,6026.0,12261.0,2711.0
3,2011,4,5419.0,12121.0,2775.0
4,2011,5,4028.0,9754.0,2210.0
...,...,...,...,...,...
139,2022,8,688.0,11654.0,8916.0
140,2022,9,760.0,11341.0,7415.0
141,2022,10,649.0,10258.0,7793.0
142,2022,11,574.0,10559.0,7694.0


In [15]:
# 첫째 달의 값을 임시로 채움
df_count=df_count.fillna(method='bfill')
df_count.head()

Unnamed: 0,year,month,deal_count,full_rent_count,month_rent_count
0,2011,1,7179.0,12336.0,2514.0
1,2011,2,7179.0,12336.0,2514.0
2,2011,3,6026.0,12261.0,2711.0
3,2011,4,5419.0,12121.0,2775.0
4,2011,5,4028.0,9754.0,2210.0


In [16]:
df_count.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 144 entries, 0 to 143
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   year              144 non-null    int64  
 1   month             144 non-null    int64  
 2   deal_count        144 non-null    float64
 3   full_rent_count   144 non-null    float64
 4   month_rent_count  144 non-null    float64
dtypes: float64(3), int64(2)
memory usage: 6.8 KB


## 경제지표 정보와 통합한 테이블 생성

In [17]:
df_economic.head()

Unnamed: 0,date,year,month,day,apartment_index,kospi_index,korea_rp,korea_3_year,korea_10_year,korea_10-3_year,us_3_month,us_2_year,us_10_year,us_10-2_year,us_10-3_year_month,apartment_supply,unsold_count,unsold_ratio
0,2011-01-01,2011,1,1,93.0,2051.0,2.5,3.44,4.57,1.13,0.124,0.601,3.334,2.733,3.21,5345,2269,42.450889
1,2011-01-02,2011,1,2,93.0,2051.0,2.5,3.44,4.57,1.13,0.124,0.601,3.334,2.733,3.21,5345,2269,42.450889
2,2011-01-03,2011,1,3,93.0,2070.08,2.5,3.44,4.57,1.13,0.124,0.601,3.334,2.733,3.21,5345,2269,42.450889
3,2011-01-04,2011,1,4,93.0,2085.14,2.5,3.495,4.58,1.085,0.142,0.621,3.338,2.717,3.196,5345,2269,42.450889
4,2011-01-05,2011,1,5,93.0,2082.55,2.5,3.495,4.63,1.135,0.142,0.708,3.463,2.755,3.321,5345,2269,42.450889


In [18]:
df_economic=pd.merge(df_economic, df_count, left_on=["year","month"], right_on=["year","month"], how="left")
df_economic.head()

Unnamed: 0,date,year,month,day,apartment_index,kospi_index,korea_rp,korea_3_year,korea_10_year,korea_10-3_year,...,us_2_year,us_10_year,us_10-2_year,us_10-3_year_month,apartment_supply,unsold_count,unsold_ratio,deal_count,full_rent_count,month_rent_count
0,2011-01-01,2011,1,1,93.0,2051.0,2.5,3.44,4.57,1.13,...,0.601,3.334,2.733,3.21,5345,2269,42.450889,7179.0,12336.0,2514.0
1,2011-01-02,2011,1,2,93.0,2051.0,2.5,3.44,4.57,1.13,...,0.601,3.334,2.733,3.21,5345,2269,42.450889,7179.0,12336.0,2514.0
2,2011-01-03,2011,1,3,93.0,2070.08,2.5,3.44,4.57,1.13,...,0.601,3.334,2.733,3.21,5345,2269,42.450889,7179.0,12336.0,2514.0
3,2011-01-04,2011,1,4,93.0,2085.14,2.5,3.495,4.58,1.085,...,0.621,3.338,2.717,3.196,5345,2269,42.450889,7179.0,12336.0,2514.0
4,2011-01-05,2011,1,5,93.0,2082.55,2.5,3.495,4.63,1.135,...,0.708,3.463,2.755,3.321,5345,2269,42.450889,7179.0,12336.0,2514.0


In [19]:
df_economic.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4383 entries, 0 to 4382
Data columns (total 21 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   date                4383 non-null   object 
 1   year                4383 non-null   int64  
 2   month               4383 non-null   int64  
 3   day                 4383 non-null   int64  
 4   apartment_index     4383 non-null   float64
 5   kospi_index         4383 non-null   float64
 6   korea_rp            4383 non-null   float64
 7   korea_3_year        4383 non-null   float64
 8   korea_10_year       4383 non-null   float64
 9   korea_10-3_year     4383 non-null   float64
 10  us_3_month          4383 non-null   float64
 11  us_2_year           4383 non-null   float64
 12  us_10_year          4383 non-null   float64
 13  us_10-2_year        4383 non-null   float64
 14  us_10-3_year_month  4383 non-null   float64
 15  apartment_supply    4383 non-null   int64  
 16  unsold

In [20]:
# 거래 날짜들이 모든 날짜에 체결이 되는 것이 아님으로 모든 날짜들이 있는
# df_economic에 대해서 날짜들을 리스트로 생성
date_range_list = list(df_economic['date'])
date_list = list()
for date_element in date_range_list:
    date_list_element = list()
    for i in range(3):
        date_list_element.append(int(date_element.split('-')[i]))
    date_list.append(tuple(date_list_element))
date_list

[(2011, 1, 1),
 (2011, 1, 2),
 (2011, 1, 3),
 (2011, 1, 4),
 (2011, 1, 5),
 (2011, 1, 6),
 (2011, 1, 7),
 (2011, 1, 8),
 (2011, 1, 9),
 (2011, 1, 10),
 (2011, 1, 11),
 (2011, 1, 12),
 (2011, 1, 13),
 (2011, 1, 14),
 (2011, 1, 15),
 (2011, 1, 16),
 (2011, 1, 17),
 (2011, 1, 18),
 (2011, 1, 19),
 (2011, 1, 20),
 (2011, 1, 21),
 (2011, 1, 22),
 (2011, 1, 23),
 (2011, 1, 24),
 (2011, 1, 25),
 (2011, 1, 26),
 (2011, 1, 27),
 (2011, 1, 28),
 (2011, 1, 29),
 (2011, 1, 30),
 (2011, 1, 31),
 (2011, 2, 1),
 (2011, 2, 2),
 (2011, 2, 3),
 (2011, 2, 4),
 (2011, 2, 5),
 (2011, 2, 6),
 (2011, 2, 7),
 (2011, 2, 8),
 (2011, 2, 9),
 (2011, 2, 10),
 (2011, 2, 11),
 (2011, 2, 12),
 (2011, 2, 13),
 (2011, 2, 14),
 (2011, 2, 15),
 (2011, 2, 16),
 (2011, 2, 17),
 (2011, 2, 18),
 (2011, 2, 19),
 (2011, 2, 20),
 (2011, 2, 21),
 (2011, 2, 22),
 (2011, 2, 23),
 (2011, 2, 24),
 (2011, 2, 25),
 (2011, 2, 26),
 (2011, 2, 27),
 (2011, 2, 28),
 (2011, 3, 1),
 (2011, 3, 2),
 (2011, 3, 3),
 (2011, 3, 4),
 (2011, 3, 5),

In [22]:
# csv 파일 저장
# df_economic.to_csv('dataset\\after_data\\final_economic.csv',index=False)

# 피봇 테이블 생성

## 아파트 거래 피봇 테이블 생성

In [2]:
# 대표 데이터 파악
df_deal.head()

Unnamed: 0,date,year,month,day,address_0,address_1,address_2,address_3,address_4,name,floor,deal_price
0,2011-07-09,2011,7,9,서울특별시,강남구,개포동,655.0,2.0,개포2차현대아파트(220),3,64000
1,2011-07-28,2011,7,28,서울특별시,강남구,개포동,655.0,2.0,개포2차현대아파트(220),5,65500
2,2011-01-19,2011,1,19,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,3,70500
3,2011-09-02,2011,9,2,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,5,85000
4,2011-12-17,2011,12,17,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,1,68000


In [3]:
# 최근에 체결된 가격이 계속 유지된다고 생각을 하고 모든 날짜의 가격들을 결정
# 이를 위해서 피봇테이블 생성
pivot_table_deal=df_deal.pivot_table(index=['year','month','day'], columns=['address_1','address_2','address_3','address_4'], values="deal_price")
pivot_table_deal

Unnamed: 0_level_0,Unnamed: 1_level_0,address_1,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,address_2,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,address_3,12.0,12.0,138.0,140.0,141.0,166.0,172.0,176.0,177.0,179.0,...,307.0,314.0,318.0,331.0,413.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,address_4,0.0,2.0,0.0,0.0,0.0,4.0,3.0,1.0,0.0,0.0,...,76.0,1.0,81.0,64.0,8.0,0.0,0.0,0.0,0.0,0.0
year,month,day,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4
2011,1,1,,,,,,,,,,,...,,,,,,,,,,
2011,1,2,,,,,,,,,,,...,,,,,,,25800.0,,,
2011,1,3,,,,,,,,,,,...,,,,,,,,,,
2011,1,4,33800.0,,,,,,,,,,...,,,,,,,,,,
2011,1,5,43000.0,,89400.0,,80300.0,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,12,27,,,,,,,,,,,...,,,,,,,,,,
2022,12,28,,,,,,,,,,,...,,,,,,,,,,
2022,12,29,,,,,,,,,,,...,,,,,,,,,,
2022,12,30,,,,,,,,,,,...,,,,,,,,,,


In [4]:
pivot_table_deal.info() # 2011년 1월 1일부터 2022년 12월 31일까지 총 4383의 날이 있어야 하는데 4381로 2날짜가 비어있음

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4381 entries, (2011, 1, 1) to (2022, 12, 31)
Columns: 8860 entries, ('강남구', '개포동', 12.0, 0.0) to ('중랑구', '중화동', 454.0, 0.0)
dtypes: float64(8860)
memory usage: 296.2 MB


In [24]:
# 기간 내 모든 날짜들에서체결 날짜들 빼서 체결날짜에서 없는 날짜들을 고름
print(set(date_list) - set(pivot_table_deal.index))
print(set(pivot_table_deal.index) - set(date_list))

{(2016, 2, 9), (2022, 9, 11)}
set()


In [5]:
# 빈 날짜 들을 null 로 채움
pivot_table_deal.loc[(2016, 2, 9)]=np.nan
pivot_table_deal.loc[(2022, 9, 11)]=np.nan

In [6]:
# 연, 월, 일 로 정렬을 함
pivot_table_deal = pivot_table_deal.sort_values(by=['year','month','day'])
pivot_table_deal

Unnamed: 0_level_0,Unnamed: 1_level_0,address_1,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,address_2,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,address_3,12.0,12.0,138.0,140.0,141.0,166.0,172.0,176.0,177.0,179.0,...,307.0,314.0,318.0,331.0,413.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,address_4,0.0,2.0,0.0,0.0,0.0,4.0,3.0,1.0,0.0,0.0,...,76.0,1.0,81.0,64.0,8.0,0.0,0.0,0.0,0.0,0.0
year,month,day,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4
2011,1,1,,,,,,,,,,,...,,,,,,,,,,
2011,1,2,,,,,,,,,,,...,,,,,,,25800.0,,,
2011,1,3,,,,,,,,,,,...,,,,,,,,,,
2011,1,4,33800.0,,,,,,,,,,...,,,,,,,,,,
2011,1,5,43000.0,,89400.0,,80300.0,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,12,27,,,,,,,,,,,...,,,,,,,,,,
2022,12,28,,,,,,,,,,,...,,,,,,,,,,
2022,12,29,,,,,,,,,,,...,,,,,,,,,,
2022,12,30,,,,,,,,,,,...,,,,,,,,,,


In [6]:
pivot_table_deal.info() 

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4383 entries, (2011, 1, 1) to (2022, 12, 31)
Columns: 8860 entries, ('강남구', '개포동', 12.0, 0.0) to ('중랑구', '중화동', 454.0, 0.0)
dtypes: float64(8860)
memory usage: 296.3 MB


In [7]:
# 가장 최근에 체결된 값이 거래가격으로 유지 됨으로 ffill()을 사용
pivot_table_deal=pivot_table_deal.ffill()
pivot_table_deal

Unnamed: 0_level_0,Unnamed: 1_level_0,address_1,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,address_2,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,address_3,12.0,12.0,138.0,140.0,141.0,166.0,172.0,176.0,177.0,179.0,...,307.0,314.0,318.0,331.0,413.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,address_4,0.0,2.0,0.0,0.0,0.0,4.0,3.0,1.0,0.0,0.0,...,76.0,1.0,81.0,64.0,8.0,0.0,0.0,0.0,0.0,0.0
year,month,day,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4
2011,1,1,,,,,,,,,,,...,,,,,,,,,,
2011,1,2,,,,,,,,,,,...,,,,,,,25800.0,,,
2011,1,3,,,,,,,,,,,...,,,,,,,25800.0,,,
2011,1,4,33800.0,,,,,,,,,,...,,,,,,,25800.0,,,
2011,1,5,43000.0,,89400.0,,80300.0,,,,,,...,,,,,,,25800.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,12,27,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,350000.0,390000.0,240000.0,...,37800.0,74000.0,32500.0,76500.0,28000.0,69500.0,91500.0,55000.0,85500.0,96000.0
2022,12,28,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,350000.0,390000.0,240000.0,...,37800.0,74000.0,32500.0,76500.0,28000.0,69500.0,91500.0,55000.0,85500.0,96000.0
2022,12,29,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,350000.0,390000.0,240000.0,...,37800.0,74000.0,32500.0,76500.0,28000.0,69500.0,91500.0,55000.0,85500.0,96000.0
2022,12,30,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,350000.0,390000.0,240000.0,...,37800.0,74000.0,32500.0,76500.0,28000.0,69500.0,91500.0,55000.0,85500.0,96000.0


In [9]:
# csv 파일 저장을 위해서 index를 없앰
# pivot_table_deal = pivot_table_deal.reset_index()
# pivot_table_deal

address_1,year,month,day,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
address_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
address_3,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,12.0,12.0,138.0,140.0,141.0,166.0,172.0,...,307.0,314.0,318.0,331.0,413.0,438.0,450.0,452.0,453.0,454.0
address_4,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,0.0,2.0,0.0,0.0,0.0,4.0,3.0,...,76.0,1.0,81.0,64.0,8.0,0.0,0.0,0.0,0.0,0.0
0,2011,1,1,,,,,,,,...,,,,,,,,,,
1,2011,1,2,,,,,,,,...,,,,,,,25800.0,,,
2,2011,1,3,,,,,,,,...,,,,,,,25800.0,,,
3,2011,1,4,33800.0,,,,,,,...,,,,,,,25800.0,,,
4,2011,1,5,43000.0,,89400.0,,80300.0,,,...,,,,,,,25800.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4376,2022,12,27,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,...,37800.0,74000.0,32500.0,76500.0,28000.0,69500.0,91500.0,55000.0,85500.0,96000.0
4377,2022,12,28,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,...,37800.0,74000.0,32500.0,76500.0,28000.0,69500.0,91500.0,55000.0,85500.0,96000.0
4378,2022,12,29,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,...,37800.0,74000.0,32500.0,76500.0,28000.0,69500.0,91500.0,55000.0,85500.0,96000.0
4379,2022,12,30,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,...,37800.0,74000.0,32500.0,76500.0,28000.0,69500.0,91500.0,55000.0,85500.0,96000.0


In [7]:
# csv 파일 저장
# pivot_table_deal.to_csv('dataset\\after_data\\pivot_table_deal.csv',index=False)

## 아파트 전세 피봇테이블

In [2]:
pivot_table_full_rent=df_full_rent.pivot_table(index=['year','month','day'], columns=['address_1','address_2','address_3','address_4'], values="full_rent_price")
pivot_table_full_rent # 해당 날짜에 거래가 많을 경우 mean 값이 나옴을 확인!

Unnamed: 0_level_0,Unnamed: 1_level_0,address_1,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,address_2,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,address_3,12.0,12.0,138.0,140.0,141.0,166.0,172.0,176.0,177.0,179.0,...,307.0,314.0,318.0,331.0,413.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,address_4,0.0,2.0,0.0,0.0,0.0,4.0,3.0,1.0,0.0,0.0,...,76.0,1.0,81.0,64.0,8.0,0.0,0.0,0.0,0.0,0.0
year,month,day,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4
2011,1,1,,,,,,,,,,,...,,,,,,,,,,
2011,1,2,,,,,,,,,,,...,,,,,,,,,,
2011,1,3,17000.000000,63000.0,,,9500.0,,,,,,...,,,,,,,,,,
2011,1,4,18833.333333,,,16000.000000,7250.0,,,,,,...,,,15000.0,,,,15000.0,,,
2011,1,5,,,11000.0,15833.333333,10000.0,,,,,,...,,,,,,,16000.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,12,27,,,75000.0,,,,,,,,...,,,,,,,,,,
2022,12,28,30200.000000,,,,,,,,,,...,,,,,,,,,,
2022,12,29,,,,,,,,,,68000.0,...,,,,,,,,,,
2022,12,30,,,,,,,,,,,...,,,,,,,,,,


In [3]:
pivot_table_full_rent.info() # 2011년 1월 1일부터 2022년 12월 31일까지 총 4383의 날이 있음

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4383 entries, (2011, 1, 1) to (2022, 12, 31)
Columns: 9258 entries, ('강남구', '개포동', 12.0, 0.0) to ('중랑구', '중화동', 454.0, 0.0)
dtypes: float64(9258)
memory usage: 309.6 MB


In [71]:
# 모든 날짜들이 있음을 확인함
print(set(date_list) - set(pivot_table_full_rent.index))
print(set(pivot_table_full_rent.index) - set(date_list))

set()
set()


In [4]:
pivot_table_full_rent=pivot_table_full_rent.ffill()
pivot_table_full_rent

Unnamed: 0_level_0,Unnamed: 1_level_0,address_1,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,address_2,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,address_3,12.0,12.0,138.0,140.0,141.0,166.0,172.0,176.0,177.0,179.0,...,307.0,314.0,318.0,331.0,413.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,address_4,0.0,2.0,0.0,0.0,0.0,4.0,3.0,1.0,0.0,0.0,...,76.0,1.0,81.0,64.0,8.0,0.0,0.0,0.0,0.0,0.0
year,month,day,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4
2011,1,1,,,,,,,,,,,...,,,,,,,,,,
2011,1,2,,,,,,,,,,,...,,,,,,,,,,
2011,1,3,17000.000000,63000.0,,,9500.0,,,,,,...,,,,,,,,,,
2011,1,4,18833.333333,63000.0,,16000.000000,7250.0,,,,,,...,,,15000.0,,,,15000.0,,,
2011,1,5,18833.333333,63000.0,11000.0,15833.333333,10000.0,,,,,,...,,,15000.0,,,,16000.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,12,27,36550.000000,140000.0,75000.0,107000.000000,8000.0,60900.0,73500.0,150000.0,130000.0,70000.0,...,33600.0,37000.0,12000.0,15500.0,26000.0,32000.0,32500.0,33000.0,57750.0,40000.0
2022,12,28,30200.000000,140000.0,75000.0,107000.000000,8000.0,60900.0,73500.0,150000.0,130000.0,70000.0,...,33600.0,37000.0,12000.0,15500.0,26000.0,32000.0,32500.0,33000.0,57750.0,40000.0
2022,12,29,30200.000000,140000.0,75000.0,107000.000000,8000.0,60900.0,73500.0,150000.0,130000.0,68000.0,...,33600.0,37000.0,12000.0,15500.0,26000.0,32000.0,32500.0,33000.0,57750.0,40000.0
2022,12,30,30200.000000,140000.0,75000.0,107000.000000,8000.0,60900.0,73500.0,150000.0,130000.0,68000.0,...,33600.0,37000.0,12000.0,15500.0,26000.0,32000.0,32500.0,33000.0,57750.0,40000.0


In [22]:
# csv 파일 저장을 위해서 index reset
# pivot_table_full_rent = pivot_table_full_rent.reset_index()
# pivot_table_full_rent

address_1,year,month,day,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
address_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
address_3,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,12.0,12.0,138.0,140.0,141.0,166.0,172.0,...,307.0,314.0,318.0,331.0,413.0,438.0,450.0,452.0,453.0,454.0
address_4,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,0.0,2.0,0.0,0.0,0.0,4.0,3.0,...,76.0,1.0,81.0,64.0,8.0,0.0,0.0,0.0,0.0,0.0
0,2011,1,1,,,,,,,,...,,,,,,,,,,
1,2011,1,2,,,,,,,,...,,,,,,,,,,
2,2011,1,3,17000.000000,63000.0,,,9500.0,,,...,,,,,,,,,,
3,2011,1,4,18833.333333,63000.0,,16000.000000,7250.0,,,...,,,15000.0,,,,15000.0,,,
4,2011,1,5,18833.333333,63000.0,11000.0,15833.333333,10000.0,,,...,,,15000.0,,,,16000.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4378,2022,12,27,36550.000000,140000.0,75000.0,107000.000000,8000.0,60900.0,73500.0,...,33600.0,37000.0,12000.0,15500.0,26000.0,32000.0,32500.0,33000.0,57750.0,40000.0
4379,2022,12,28,30200.000000,140000.0,75000.0,107000.000000,8000.0,60900.0,73500.0,...,33600.0,37000.0,12000.0,15500.0,26000.0,32000.0,32500.0,33000.0,57750.0,40000.0
4380,2022,12,29,30200.000000,140000.0,75000.0,107000.000000,8000.0,60900.0,73500.0,...,33600.0,37000.0,12000.0,15500.0,26000.0,32000.0,32500.0,33000.0,57750.0,40000.0
4381,2022,12,30,30200.000000,140000.0,75000.0,107000.000000,8000.0,60900.0,73500.0,...,33600.0,37000.0,12000.0,15500.0,26000.0,32000.0,32500.0,33000.0,57750.0,40000.0


In [23]:
# csv 파일 저장
# pivot_table_full_rent.to_csv('dataset\\after_data\\pivot_table_full_rent.csv',index=False)

## 아파트월세 피봇테이블 -> 아파트연세 피봇테이블 

- 보증금은 계약시의 상황마다 다를 것
- 전월세전환률을 적용하여서 월세에서의 보증금을 해결
- 거래들마다 상황에 따라 보증금과 월세금액은 다를 수 있음으로, 보증금의 5.8% 값에 월세*12을 더하여 1년간 들어가는 금액을 계산

In [2]:
df_month_rent['year_rent_price'] = (df_month_rent['rent_deposit']*0.058)+(df_month_rent['month_rent_price']*12)
df_month_rent

Unnamed: 0,date,year,month,day,address_0,address_1,address_2,address_3,address_4,name,floor,rent_deposit,month_rent_price,year_rent_price
0,2011-03-18,2011,3,18,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,1.0,19000,63,1858.0
1,2011-04-09,2011,4,9,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,1.0,21000,35,1638.0
2,2011-07-09,2011,7,9,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,4.0,3000,160,2094.0
3,2011-09-19,2011,9,19,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,1.0,6000,140,2028.0
4,2011-09-20,2011,9,20,서울특별시,강남구,개포동,658.0,1.0,개포6차우성아파트1동~8동,2.0,5000,160,2210.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637083,2022-11-25,2022,11,25,서울특별시,중랑구,중화동,450.0,0.0,한신아파트(103~109),21.0,30000,48,2316.0
637084,2022-12-10,2022,12,10,서울특별시,중랑구,중화동,450.0,0.0,한신아파트(103~109),7.0,25000,50,2050.0
637085,2022-12-24,2022,12,24,서울특별시,중랑구,중화동,450.0,0.0,한신아파트(103~109),17.0,20000,50,1760.0
637086,2022-12-28,2022,12,28,서울특별시,중랑구,중화동,450.0,0.0,한신아파트(103~109),2.0,5000,150,2090.0


In [3]:
pivot_table_year_rent=df_month_rent.pivot_table(index=['year','month','day'], columns=['address_1','address_2','address_3','address_4'], values='year_rent_price')
pivot_table_year_rent

Unnamed: 0_level_0,Unnamed: 1_level_0,address_1,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,address_2,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,address_3,12.0,12.0,138.0,140.0,141.0,172.0,176.0,177.0,179.0,185.0,...,307.0,307.0,314.0,318.0,331.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,address_4,0.0,2.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,...,6.0,76.0,1.0,81.0,64.0,0.0,0.0,0.0,0.0,0.0
year,month,day,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4
2011,1,1,,,,,,,,,,,...,,,,,,,,,,
2011,1,2,,,,,,,,,,,...,,,,,,,,,,
2011,1,3,,,,,,,,,,,...,,,,,,,,,,
2011,1,4,,,,,,,,,,1786.0,...,,,,,,,,,,
2011,1,5,,,778.0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,12,27,,,,,,,,,,,...,,,,,,,,,,
2022,12,28,2240.0,,,,,,,,,1779.0,...,,,,,,,2090.0,,,
2022,12,29,1781.0,,,,,,,,,,...,,,,,,,,,,
2022,12,30,2036.5,,4000.0,,,,,,,2096.0,...,,,,,,,,,,


In [78]:
print(pivot_table_year_rent.info()) # 4383 모든 인덱스가 있음
# 모든 날짜들이 있음을 확인함
print(set(date_list) - set(pivot_table_year_rent.index))
print(set(pivot_table_year_rent.index) - set(date_list))

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4383 entries, (2011, 1, 1) to (2022, 12, 31)
Columns: 8358 entries, ('강남구', '개포동', 12.0, 0.0) to ('중랑구', '중화동', 454.0, 0.0)
dtypes: float64(8358)
memory usage: 279.5 MB
None
set()
set()


In [4]:
pivot_table_year_rent=pivot_table_year_rent.ffill()
pivot_table_year_rent

Unnamed: 0_level_0,Unnamed: 1_level_0,address_1,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,address_2,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,address_3,12.0,12.0,138.0,140.0,141.0,172.0,176.0,177.0,179.0,185.0,...,307.0,307.0,314.0,318.0,331.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,address_4,0.0,2.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,...,6.0,76.0,1.0,81.0,64.0,0.0,0.0,0.0,0.0,0.0
year,month,day,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4
2011,1,1,,,,,,,,,,,...,,,,,,,,,,
2011,1,2,,,,,,,,,,,...,,,,,,,,,,
2011,1,3,,,,,,,,,,,...,,,,,,,,,,
2011,1,4,,,,,,,,,,1786.0,...,,,,,,,,,,
2011,1,5,,,778.0,,,,,,,1786.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,12,27,958.0,6546.0,3920.0,251.6,365.8,2740.0,9560.0,3700.0,3160.0,2700.0,...,1234.0,1304.0,2244.0,298.0,1000.0,389.0,1760.0,1610.0,2070.0,2140.0
2022,12,28,2240.0,6546.0,3920.0,251.6,365.8,2740.0,9560.0,3700.0,3160.0,1779.0,...,1234.0,1304.0,2244.0,298.0,1000.0,389.0,2090.0,1610.0,2070.0,2140.0
2022,12,29,1781.0,6546.0,3920.0,251.6,365.8,2740.0,9560.0,3700.0,3160.0,1779.0,...,1234.0,1304.0,2244.0,298.0,1000.0,389.0,2090.0,1610.0,2070.0,2140.0
2022,12,30,2036.5,6546.0,4000.0,251.6,365.8,2740.0,9560.0,3700.0,3160.0,2096.0,...,1234.0,1304.0,2244.0,298.0,1000.0,389.0,2090.0,1610.0,2070.0,2140.0


In [30]:
# csv 파일 저장을 위해서 index reset
# pivot_table_year_rent = pivot_table_year_rent.reset_index()
# pivot_table_year_rent

address_1,year,month,day,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
address_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
address_3,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,12.0,12.0,138.0,140.0,141.0,172.0,176.0,...,307.0,307.0,314.0,318.0,331.0,438.0,450.0,452.0,453.0,454.0
address_4,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,0.0,2.0,0.0,0.0,0.0,3.0,1.0,...,6.0,76.0,1.0,81.0,64.0,0.0,0.0,0.0,0.0,0.0
0,2011,1,1,,,,,,,,...,,,,,,,,,,
1,2011,1,2,,,,,,,,...,,,,,,,,,,
2,2011,1,3,,,,,,,,...,,,,,,,,,,
3,2011,1,4,,,,,,,,...,,,,,,,,,,
4,2011,1,5,,,778.0,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4378,2022,12,27,958.0,6546.0,3920.0,251.6,365.8,2740.0,9560.0,...,1234.0,1304.0,2244.0,298.0,1000.0,389.0,1760.0,1610.0,2070.0,2140.0
4379,2022,12,28,2240.0,6546.0,3920.0,251.6,365.8,2740.0,9560.0,...,1234.0,1304.0,2244.0,298.0,1000.0,389.0,2090.0,1610.0,2070.0,2140.0
4380,2022,12,29,1781.0,6546.0,3920.0,251.6,365.8,2740.0,9560.0,...,1234.0,1304.0,2244.0,298.0,1000.0,389.0,2090.0,1610.0,2070.0,2140.0
4381,2022,12,30,2036.5,6546.0,4000.0,251.6,365.8,2740.0,9560.0,...,1234.0,1304.0,2244.0,298.0,1000.0,389.0,2090.0,1610.0,2070.0,2140.0


In [28]:
# csv 파일 저장
# pivot_table_year_rent.to_csv('dataset\\after_data\\pivot_table_year_rent.csv',index=False)

# 최종 아파트 거래, 전세, 월세 테이블 생성 및 저장

- 이후는 colab에서 진행 - chunksize 한후 list화 시키는 것보다,[] 사용하는게 훨씬 빠름

## 1. slicing 사용 - 되기는 하지만 시간이 오래걸림

### list comprehension활용 - 시간단축

In [1]:
import pandas as pd
import time

start = time.time()
pivot_table_deal= pd.read_csv("pivot_table_deal.csv", header=[0,1,2,3], index_col=[0,1,2], encoding='UTF8')
pivot_table_deal_list = [pivot_table_deal.iloc[i:i+2] for i in range(0, len(pivot_table_deal), 2)]
end = time.time()
pandas_duration = end - start
print('time :',pandas_duration)
pivot_table_deal_list[0]

time : 10.169880390167236


Unnamed: 0_level_0,Unnamed: 1_level_0,year,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,12.0,12.0,138.0,140.0,141.0,166.0,172.0,176.0,177.0,179.0,...,307.0,314.0,318.0,331.0,413.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,0.0,2.0,0.0,0.0,0.0,4.0,3.0,1.0,0.0,0.0,...,76.0,1.0,81.0,64.0,8.0,0.0,0.0,0.0,0.0,0.0
2011,1,1,,,,,,,,,,,...,,,,,,,,,,
2011,1,2,,,,,,,,,,,...,,,,,,,25800.0,,,


### chunksize 활용 - 메모리도 더 많이(2.5G정도) 작아먹고, 시간도  훨씬 더 걸림

In [2]:
start = time.time()
pivot_table_deal_list = pd.read_csv("pivot_table_deal.csv", header=[0,1,2,3], index_col=[0,1,2],chunksize=2,encoding='UTF8')
pivot_table_deal_list = list(pivot_table_deal_list)
end = time.time()
pandas_duration = end - start
print('time :',pandas_duration)
pivot_table_deal_list[0]

time : 575.5457537174225


Unnamed: 0_level_0,Unnamed: 1_level_0,year,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,12.0,12.0,138.0,140.0,141.0,166.0,172.0,176.0,177.0,179.0,...,307.0,314.0,318.0,331.0,413.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,0.0,2.0,0.0,0.0,0.0,4.0,3.0,1.0,0.0,0.0,...,76.0,1.0,81.0,64.0,8.0,0.0,0.0,0.0,0.0,0.0
2011,1,1,,,,,,,,,,,...,,,,,,,,,,
2011,1,2,,,,,,,,,,,...,,,,,,,25800.0,,,


### 공통부분 - 반복하며 파일로 저장

In [9]:
# 수정할 대 덮어쓰기 조
# 한 20초에 한 chunk 씩 처리가 됨
number = 72
for i,df_deal in enumerate(pivot_table_deal_list[number:]):
    df_deal=df_deal.stack(level=[0,1,2,3])
    df_deal=df_deal.reset_index()
    df_deal.columns=['year','month','day','address_1','address_2','address_3','address_4','deal_price']
    # 여기에 저장파트 추가
    df_deal.to_csv('dataset\\after_data\\after_deal_price\\'+str(number+i)+'.csv',index=False)
    print(i+number)
    print(df_deal.head())
    print()

66
   year  month  day address_1 address_2 address_3 address_4  deal_price
0  2011      5   13       강남구       개포동    1164.0      25.0     47000.0
1  2011      5   13       강남구       개포동    1164.0      30.0     44000.0
2  2011      5   13       강남구       개포동      12.0       0.0     50000.0
3  2011      5   13       강남구       개포동      12.0       2.0    155000.0
4  2011      5   13       강남구       개포동    1260.0      11.0     72300.0

67
   year  month  day address_1 address_2 address_3 address_4  deal_price
0  2011      5   15       강남구       개포동    1164.0      25.0     47000.0
1  2011      5   15       강남구       개포동    1164.0      30.0     44000.0
2  2011      5   15       강남구       개포동      12.0       0.0     50000.0
3  2011      5   15       강남구       개포동      12.0       2.0    155000.0
4  2011      5   15       강남구       개포동    1260.0      11.0     72300.0

68
   year  month  day address_1 address_2 address_3 address_4  deal_price
0  2011      5   17       강남구       개포동    1164.0    

KeyboardInterrupt: 

In [2]:
# 리스트의 개수 확인
len(pivot_table_deal_list)

2191

## 2. sparse 사용 - 아직 구현중, 용량이 크게 주는 것 같지는 않고, 행렬을 어떻게 처리해야 할지 고민..

In [41]:
from numpy import array
import scipy # sparse matrix 사용 위해서 임포트

sparse_csr=scipy.sparse.csr_matrix(pivot_table_deal.values)
sparse_csr

<4381x8863 sparse matrix of type '<class 'numpy.float64'>'
	with 38828803 stored elements in Compressed Sparse Row format>

In [None]:
print(sparse_csr.toarray())
print(sparse_csr.toarray().nbytes)

## 3. dask 사용 - 멀티인덱스 제공X

## 4. modin 사용 - 설치가 제대로 안되는 것 같고, 빠르지도 않은듯?? 좀더 실험 필요

## 5. multiprocess 사용 - 아직 구현중, 다른 방법이 없을까 좀 더 고민

In [None]:
import multiprocessing

import multiprocessing as mp

def stack_pivot_table(pivot_table_part):
    pivot_table_part=pivot_table_part.stack(level=[0,1,2,3])
    pivot_table_part=pivot_table_part.reset_index()
    pivot_table_part.columns=['year','month','day','address_1','address_2','address_3','address_4','deal_price']
    print(pivot_table_part.head())
    return pivot_table_part

num_processes = 4
pivot_table_deal_list = [pivot_table_deal.iloc[i:i+2] for i in range(0, 10, 2)] # 10대신에 전체 데이터프레임 길이를 넣음

with mp.Pool(processes=num_processes) as pool:
    results = pool.map(stack_pivot_table, pivot_table_deal_list)

## 6. 컬럼을 슬라이싱해서 stack - 메모리 문제도 없고, 속도도 매우 빠름 -> 컬럼이 매우 많으면 잘라서 사용

### 아파트 거래테이블 생성

In [8]:
# 여기서 pivo_table_deal은 reset_index 하기 전 테이블
pivot_table_deal.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,address_1,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,address_2,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,address_3,12.0,12.0,138.0,140.0,141.0,166.0,172.0,176.0,177.0,179.0,...,307.0,314.0,318.0,331.0,413.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,address_4,0.0,2.0,0.0,0.0,0.0,4.0,3.0,1.0,0.0,0.0,...,76.0,1.0,81.0,64.0,8.0,0.0,0.0,0.0,0.0,0.0
year,month,day,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4
2011,1,1,,,,,,,,,,,...,,,,,,,,,,
2011,1,2,,,,,,,,,,,...,,,,,,,25800.0,,,
2011,1,3,,,,,,,,,,,...,,,,,,,25800.0,,,
2011,1,4,33800.0,,,,,,,,,,...,,,,,,,25800.0,,,
2011,1,5,43000.0,,89400.0,,80300.0,,,,,,...,,,,,,,25800.0,,,


In [9]:
pivot_table_deal.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4383 entries, (2011, 1, 1) to (2022, 12, 31)
Columns: 8860 entries, ('강남구', '개포동', 12.0, 0.0) to ('중랑구', '중화동', 454.0, 0.0)
dtypes: float64(8860)
memory usage: 296.3 MB


In [9]:
# null 값을 채움 - 값을 채우지 않으면 추후 stack을 할 때 null 값을 계산을 안함
pivot_table_deal = pivot_table_deal.fillna(0)
pivot_table_deal

Unnamed: 0_level_0,Unnamed: 1_level_0,address_1,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,address_2,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,address_3,12.0,12.0,138.0,140.0,141.0,166.0,172.0,176.0,177.0,179.0,...,307.0,314.0,318.0,331.0,413.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,address_4,0.0,2.0,0.0,0.0,0.0,4.0,3.0,1.0,0.0,0.0,...,76.0,1.0,81.0,64.0,8.0,0.0,0.0,0.0,0.0,0.0
year,month,day,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4
2011,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,25800.0,0.0,0.0,0.0
2011,1,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,25800.0,0.0,0.0,0.0
2011,1,4,33800.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,25800.0,0.0,0.0,0.0
2011,1,5,43000.0,0.0,89400.0,0.0,80300.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,25800.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,12,27,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,350000.0,390000.0,240000.0,...,37800.0,74000.0,32500.0,76500.0,28000.0,69500.0,91500.0,55000.0,85500.0,96000.0
2022,12,28,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,350000.0,390000.0,240000.0,...,37800.0,74000.0,32500.0,76500.0,28000.0,69500.0,91500.0,55000.0,85500.0,96000.0
2022,12,29,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,350000.0,390000.0,240000.0,...,37800.0,74000.0,32500.0,76500.0,28000.0,69500.0,91500.0,55000.0,85500.0,96000.0
2022,12,30,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,350000.0,390000.0,240000.0,...,37800.0,74000.0,32500.0,76500.0,28000.0,69500.0,91500.0,55000.0,85500.0,96000.0


In [11]:
# 테스트
# 컬럼이 많아서 문제가 되는것 같아서 컬럼을 잘라서 stack을 적용해보자
# 컬럼 자른것 확인
pivot_table_deal_part = pivot_table_deal.iloc[:,0:20]
pivot_table_deal_part

Unnamed: 0_level_0,Unnamed: 1_level_0,address_1,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구
Unnamed: 0_level_1,Unnamed: 1_level_1,address_2,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동
Unnamed: 0_level_2,Unnamed: 1_level_2,address_3,12.0,12.0,138.0,140.0,141.0,166.0,172.0,176.0,177.0,179.0,185.0,187.0,189.0,649.0,651.0,652.0,653.0,654.0,655.0,655.0
Unnamed: 0_level_3,Unnamed: 1_level_3,address_4,0.0,2.0,0.0,0.0,0.0,4.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0
year,month,day,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4
2011,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,1,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,1,4,33800.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,1,5,43000.0,0.0,89400.0,0.0,80300.0,0.0,0.0,0.0,0.0,0.0,89000.0,0.0,0.0,0.0,0.0,0.0,132000.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,12,27,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,350000.0,390000.0,240000.0,190000.0,285000.0,182000.0,400000.0,268143.0,249500.0,275000.0,291500.0,192000.0,174000.0
2022,12,28,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,350000.0,390000.0,240000.0,190000.0,285000.0,182000.0,400000.0,268143.0,249500.0,275000.0,291500.0,192000.0,174000.0
2022,12,29,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,350000.0,390000.0,240000.0,190000.0,285000.0,182000.0,400000.0,268143.0,249500.0,275000.0,291500.0,192000.0,174000.0
2022,12,30,104000.0,303000.0,220000.0,63250.0,184000.0,120000.0,179500.0,350000.0,390000.0,240000.0,190000.0,285000.0,182000.0,400000.0,268143.0,249500.0,275000.0,291500.0,192000.0,174000.0


In [12]:
stack_deal_table =pivot_table_deal_part.stack(level=[0,1,2,3])
stack_deal_table=stack_deal_table.reset_index()
stack_deal_table.columns=['year','month','day','address_1','address_2','address_3','address_4','deal_price']
stack_deal_table

Unnamed: 0,year,month,day,address_1,address_2,address_3,address_4,deal_price
0,2011,1,1,강남구,개포동,12.0,0.0,0.0
1,2011,1,1,강남구,개포동,12.0,2.0,0.0
2,2011,1,1,강남구,개포동,138.0,0.0,0.0
3,2011,1,1,강남구,개포동,140.0,0.0,0.0
4,2011,1,1,강남구,개포동,141.0,0.0,0.0
...,...,...,...,...,...,...,...,...
87655,2022,12,31,강남구,개포동,652.0,0.0,249500.0
87656,2022,12,31,강남구,개포동,653.0,0.0,275000.0
87657,2022,12,31,강남구,개포동,654.0,0.0,291500.0
87658,2022,12,31,강남구,개포동,655.0,1.0,192000.0


In [10]:
# 최종코드
# 컬럼의 개수는 8860
# 메모리를 많이 잡아먹어서 중간에 저장을 해야 할 듯
column_range_list = [[0,100],[100,200],[200,300],[300,400],[400,443]]
for column_range in column_range_list:
    for i in range(column_range[0],column_range[1]):
        start = 20*i
        end = 20*i + 20
        pivot_table_deal_part = pivot_table_deal.iloc[:,start:end]
        stack_deal_table =pivot_table_deal_part.stack(level=[0,1,2,3])
        stack_deal_table=stack_deal_table.reset_index()
        stack_deal_table.columns=['year','month','day','address_1','address_2','address_3','address_4','deal_price']
        if i == column_range[0]:
            df_deal_final = stack_deal_table.copy()
        else:
            df_deal_final = pd.concat([df_deal_final, stack_deal_table], axis=0)
            df_deal_final.reset_index(drop=True, inplace=True)
        print(i)
        print(stack_deal_table.head(1))
    df_deal_final.to_csv('dataset\\after_data\\final_deal_price\\'+str(column_range[0])+'_'+str(column_range[1])+'.csv',index=False)

0
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       강남구       개포동       12.0        0.0         0.0
1
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       강남구       개포동      655.0        3.0         0.0
2
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       강남구       논현동        9.0        2.0         0.0
3
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       강남구       논현동      103.0       11.0         0.0
4
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       강남구       논현동      196.0        4.0         0.0
5
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       강남구       논현동      261.0        8.0         0.0
6
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1     

55
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       강동구       천호동       27.0        6.0         0.0
56
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       강동구       천호동       49.0        8.0         0.0
57
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       강동구       천호동       55.0        4.0         0.0
58
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       강동구       천호동      302.0        3.0         0.0
59
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       강동구       천호동      416.0       12.0         0.0
60
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       강동구       천호동      570.0        0.0         0.0
61
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1   

112
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       관악구       신림동     1684.0        7.0         0.0
113
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       관악구       신림동     1718.0        0.0         0.0
114
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       관악구       신림동     1738.0        0.0         0.0
115
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       광진구       광장동      453.0        1.0         0.0
116
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       광진구       구의동       45.0       11.0         0.0
117
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       광진구       구의동      224.0        7.0         0.0
118
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011   

166
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       금천구       시흥동      798.0       64.0         0.0
167
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       금천구       시흥동      949.0       18.0         0.0
168
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       금천구       시흥동      999.0        1.0         0.0
169
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       노원구       공릉동      109.0        0.0         0.0
170
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       노원구       공릉동      380.0       56.0         0.0
171
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       노원구       공릉동      585.0        7.0         0.0
172
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011   

221
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       동작구      신대방동      360.0       17.0         0.0
222
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       동작구      신대방동      711.0        0.0         0.0
223
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       동작구       흑석동      332.0        0.0         0.0
224
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       마포구       공덕동      464.0        0.0         0.0
225
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       마포구       도화동       82.0        0.0         0.0
226
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       마포구       망원동      239.0        0.0         0.0
227
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011   

275
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       서초구       방배동     2626.0        0.0         0.0
276
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       서초구       서초동     1315.0        0.0         0.0
277
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       서초구       서초동     1337.0       22.0         0.0
278
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       서초구       서초동     1363.0       25.0         0.0
279
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       서초구       서초동     1454.0       29.0         0.0
280
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       서초구       서초동     1472.0        1.0         0.0
281
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011   

330
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       송파구       송파동      187.0        5.0         0.0
331
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       송파구       오금동        9.0        7.0         0.0
332
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       송파구       오금동       75.0        0.0         0.0
333
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       송파구       오금동      615.0        0.0         0.0
334
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       송파구       잠실동      336.0        5.0         0.0
335
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       송파구       장지동      896.0        0.0         0.0
336
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011   

384
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       용산구       한남동      805.0        0.0         0.0
385
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       용산구       효창동        5.0      127.0         0.0
386
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       용산구       후암동      143.0       23.0         0.0
387
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       은평구       갈현동      281.0      208.0         0.0
388
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       은평구       갈현동      499.0       18.0         0.0
389
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       은평구       구산동       24.0       59.0         0.0
390
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011   

439
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       중랑구       상봉동      269.0        7.0         0.0
440
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       중랑구       신내동      449.0        1.0         0.0
441
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       중랑구       신내동      788.0        0.0         0.0
442
   year  month  day address_1 address_2  address_3  address_4  deal_price
0  2011      1    1       중랑구       중화동      207.0       14.0         0.0


### 아파트 전세 테이블 생성

In [5]:
pivot_table_full_rent.info() # 컬럼의 개수가 총 9258개

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4383 entries, (2011, 1, 1) to (2022, 12, 31)
Columns: 9258 entries, ('강남구', '개포동', 12.0, 0.0) to ('중랑구', '중화동', 454.0, 0.0)
dtypes: float64(9258)
memory usage: 309.6 MB


In [6]:
pivot_table_full_rent = pivot_table_full_rent.fillna(0)
pivot_table_full_rent

Unnamed: 0_level_0,Unnamed: 1_level_0,address_1,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,address_2,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,address_3,12.0,12.0,138.0,140.0,141.0,166.0,172.0,176.0,177.0,179.0,...,307.0,314.0,318.0,331.0,413.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,address_4,0.0,2.0,0.0,0.0,0.0,4.0,3.0,1.0,0.0,0.0,...,76.0,1.0,81.0,64.0,8.0,0.0,0.0,0.0,0.0,0.0
year,month,day,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4
2011,1,1,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,1,2,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,1,3,17000.000000,63000.0,0.0,0.000000,9500.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,1,4,18833.333333,63000.0,0.0,16000.000000,7250.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,15000.0,0.0,0.0,0.0,15000.0,0.0,0.0,0.0
2011,1,5,18833.333333,63000.0,11000.0,15833.333333,10000.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,15000.0,0.0,0.0,0.0,16000.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,12,27,36550.000000,140000.0,75000.0,107000.000000,8000.0,60900.0,73500.0,150000.0,130000.0,70000.0,...,33600.0,37000.0,12000.0,15500.0,26000.0,32000.0,32500.0,33000.0,57750.0,40000.0
2022,12,28,30200.000000,140000.0,75000.0,107000.000000,8000.0,60900.0,73500.0,150000.0,130000.0,70000.0,...,33600.0,37000.0,12000.0,15500.0,26000.0,32000.0,32500.0,33000.0,57750.0,40000.0
2022,12,29,30200.000000,140000.0,75000.0,107000.000000,8000.0,60900.0,73500.0,150000.0,130000.0,68000.0,...,33600.0,37000.0,12000.0,15500.0,26000.0,32000.0,32500.0,33000.0,57750.0,40000.0
2022,12,30,30200.000000,140000.0,75000.0,107000.000000,8000.0,60900.0,73500.0,150000.0,130000.0,68000.0,...,33600.0,37000.0,12000.0,15500.0,26000.0,32000.0,32500.0,33000.0,57750.0,40000.0


In [7]:
column_range_list = [[0,100],[100,200],[200,300],[300,400],[400,463]]
for column_range in column_range_list:
    for i in range(column_range[0],column_range[1]):
        start = 20*i
        end = 20*i + 20
        if i == 462:
            end = 9258
        pivot_table_full_rent_part = pivot_table_full_rent.iloc[:,start:end]
        stack_full_rent_table = pivot_table_full_rent_part.stack(level=[0,1,2,3])
        stack_full_rent_table=stack_full_rent_table.reset_index()
        stack_full_rent_table.columns=['year','month','day','address_1','address_2','address_3','address_4','full_rent_price']
        if i == column_range[0]:
            df_full_rent_final = stack_full_rent_table.copy()
        else:
            df_full_rent_final = pd.concat([df_full_rent_final, stack_full_rent_table], axis=0)
            df_full_rent_final.reset_index(drop=True, inplace=True)
        print(i)
        print(stack_full_rent_table.head(1))
        print()  
    df_full_rent_final.to_csv('dataset\\after_data\\final_full_rent\\'+str(column_range[0])+'_'+str(column_range[1])+'.csv',index=False)

0
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강남구       개포동       12.0        0.0              0.0

1
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강남구       개포동      655.0        3.0              0.0

2
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강남구       개포동     1283.0        0.0              0.0

3
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강남구       논현동       80.0       13.0              0.0

4
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강남구       논현동      194.0       22.0              0.0

5
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강남구       논현동      252.0        0.0              0.0

6
   year  month  day address_1 ad

51
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강동구       성내동      438.0        6.0              0.0

52
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강동구       성내동      452.0        2.0              0.0

53
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강동구       성내동      513.0        0.0              0.0

54
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강동구       성내동      601.0        0.0              0.0

55
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강동구       암사동      442.0        1.0              0.0

56
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강동구       암사동      487.0       37.0              0.0

57
   year  month  day addre

104
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       강서구       화곡동     1160.0        0.0              0.0

105
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       관악구       남현동      602.0      201.0              0.0

106
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       관악구       남현동     1072.0       46.0              0.0

107
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       관악구       남현동     1139.0        0.0              0.0

108
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       관악구       봉천동      148.0      129.0              0.0

109
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       관악구       봉천동      645.0       87.0              0.0

110
   year  month  da

155
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       구로구       구로동      796.0        3.0              0.0

156
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       구로구       구로동      797.0        9.0              0.0

157
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       구로구       구로동      799.0        1.0              0.0

158
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       구로구       구로동      803.0       13.0              0.0

159
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       구로구       구로동      807.0       16.0              0.0

160
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       구로구       구로동     1268.0        0.0              0.0

161
   year  month  da

206
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1      동대문구       용두동       29.0        1.0              0.0

207
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1      동대문구       용두동      792.0        0.0              0.0

208
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1      동대문구       이문동      327.0        1.0              0.0

209
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1      동대문구       장안동      306.0       13.0              0.0

210
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1      동대문구       장안동      333.0        5.0              0.0

211
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1      동대문구       장안동      345.0       10.0              0.0

212
   year  month  da

257
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1      서대문구       연희동      103.0        1.0              0.0

258
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1      서대문구       연희동      708.0        4.0              0.0

259
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1      서대문구       창천동      501.0       14.0              0.0

260
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1      서대문구     충정로3가      222.0        0.0              0.0

261
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1      서대문구       홍은동      150.0       13.0              0.0

262
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1      서대문구       홍은동      274.0       60.0              0.0

263
   year  month  da

308
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       서초구       잠원동      162.0        0.0              0.0

309
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       성동구     금호동4가      180.0        0.0              0.0

310
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       성동구       마장동      784.0        0.0              0.0

311
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       성동구     상왕십리동      811.0        0.0              0.0

312
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       성동구     성수동1가      716.0        0.0              0.0

313
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       성동구     성수동2가      838.0        0.0              0.0

314
   year  month  da

359
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       양천구       신월동       54.0       10.0              0.0

360
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       양천구       신월동      134.0       17.0              0.0

361
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       양천구       신월동      222.0        7.0              0.0

362
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       양천구       신월동      440.0       10.0              0.0

363
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       양천구       신월동      485.0        2.0              0.0

364
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       양천구       신월동      510.0        3.0              0.0

365
   year  month  da

412
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       은평구       대조동       59.0       48.0              0.0

413
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       은평구       대조동      197.0       16.0              0.0

414
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       은평구       불광동      305.0        1.0              0.0

415
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       은평구       불광동      629.0        0.0              0.0

416
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       은평구       수색동       75.0        0.0              0.0

417
   year  month  day address_1 address_2  address_3  address_4  full_rent_price
0  2011      1    1       은평구       신사동       19.0       84.0              0.0

418
   year  month  da

### 아파트 연세 테이블 생성

In [5]:
pivot_table_year_rent.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4383 entries, (2011, 1, 1) to (2022, 12, 31)
Columns: 8358 entries, ('강남구', '개포동', 12.0, 0.0) to ('중랑구', '중화동', 454.0, 0.0)
dtypes: float64(8358)
memory usage: 279.5 MB


In [6]:
pivot_table_year_rent = pivot_table_year_rent.fillna(0)
pivot_table_year_rent

Unnamed: 0_level_0,Unnamed: 1_level_0,address_1,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,강남구,...,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구,중랑구
Unnamed: 0_level_1,Unnamed: 1_level_1,address_2,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,개포동,...,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동,중화동
Unnamed: 0_level_2,Unnamed: 1_level_2,address_3,12.0,12.0,138.0,140.0,141.0,172.0,176.0,177.0,179.0,185.0,...,307.0,307.0,314.0,318.0,331.0,438.0,450.0,452.0,453.0,454.0
Unnamed: 0_level_3,Unnamed: 1_level_3,address_4,0.0,2.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,...,6.0,76.0,1.0,81.0,64.0,0.0,0.0,0.0,0.0,0.0
year,month,day,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4
2011,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,1,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,1,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1786.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011,1,5,0.0,0.0,778.0,0.0,0.0,0.0,0.0,0.0,0.0,1786.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,12,27,958.0,6546.0,3920.0,251.6,365.8,2740.0,9560.0,3700.0,3160.0,2700.0,...,1234.0,1304.0,2244.0,298.0,1000.0,389.0,1760.0,1610.0,2070.0,2140.0
2022,12,28,2240.0,6546.0,3920.0,251.6,365.8,2740.0,9560.0,3700.0,3160.0,1779.0,...,1234.0,1304.0,2244.0,298.0,1000.0,389.0,2090.0,1610.0,2070.0,2140.0
2022,12,29,1781.0,6546.0,3920.0,251.6,365.8,2740.0,9560.0,3700.0,3160.0,1779.0,...,1234.0,1304.0,2244.0,298.0,1000.0,389.0,2090.0,1610.0,2070.0,2140.0
2022,12,30,2036.5,6546.0,4000.0,251.6,365.8,2740.0,9560.0,3700.0,3160.0,2096.0,...,1234.0,1304.0,2244.0,298.0,1000.0,389.0,2090.0,1610.0,2070.0,2140.0


In [7]:
column_range_list = [[0,100],[100,200],[200,300],[300,400],[400,418]]
for column_range in column_range_list:
    for i in range(column_range[0],column_range[1]):
        start = 20*i
        end = 20*i + 20
        if i == 417:
            end = 8358
        pivot_table_year_rent_part = pivot_table_year_rent.iloc[:,start:end]
        stack_year_rent_table = pivot_table_year_rent_part.stack(level=[0,1,2,3])
        stack_year_rent_table = stack_year_rent_table.reset_index()
        stack_year_rent_table.columns=['year','month','day','address_1','address_2','address_3','address_4','year_rent_price']
        if i == column_range[0]:
            df_year_rent_final = stack_year_rent_table.copy()
        else:
            df_year_rent_final = pd.concat([df_year_rent_final, stack_year_rent_table], axis=0)
            df_year_rent_final.reset_index(drop=True, inplace=True)
        print(i)
        print(stack_year_rent_table.head(1))
        print()  
    df_year_rent_final.to_csv('dataset\\after_data\\final_month_rent\\'+str(column_range[0])+'_'+str(column_range[1])+'.csv',index=False)

0
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       강남구       개포동       12.0        0.0              0.0

1
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       강남구       개포동      656.0        0.0              0.0

2
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       강남구       논현동       22.0        0.0              0.0

3
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       강남구       논현동      103.0       11.0              0.0

4
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       강남구       논현동      194.0       23.0              0.0

5
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       강남구       논현동      252.0        1.0              0.0

6
   year  month  day address_1 ad

51
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       강동구       성내동      604.0        0.0              0.0

52
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       강동구       암사동      451.0       16.0              0.0

53
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       강동구       암사동      508.0        0.0              0.0

54
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       강동구       천호동       35.0        4.0              0.0

55
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       강동구       천호동       52.0        3.0              0.0

56
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       강동구       천호동      166.0      106.0              0.0

57
   year  month  day addre

103
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       관악구       봉천동     1644.0       26.0              0.0

104
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       관악구       봉천동     1705.0        0.0              0.0

105
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       관악구       봉천동     1723.0        0.0              0.0

106
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       관악구       신림동      244.0       21.0              0.0

107
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       관악구       신림동      746.0       43.0              0.0

108
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       관악구       신림동     1463.0       11.0              0.0

109
   year  month  da

155
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       금천구       독산동      958.0        0.0              0.0

156
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       금천구       독산동     1006.0      139.0              0.0

157
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       금천구       독산동     1141.0        0.0              0.0

158
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       금천구       시흥동      791.0       40.0              0.0

159
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       금천구       시흥동      959.0       11.0              0.0

160
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       금천구       시흥동     1012.0        0.0              0.0

161
   year  month  da

207
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       동작구       사당동     1151.0        0.0              0.0

208
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       동작구       상도동        1.0        7.0              0.0

209
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       동작구       상도동      301.0        4.0              0.0

210
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       동작구       상도동      421.0        0.0              0.0

211
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       동작구       상도동      532.0        0.0              0.0

212
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       동작구      신대방동      686.0       48.0              0.0

213
   year  month  da

258
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       서초구       방배동      963.0       16.0              0.0

259
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       서초구       방배동     1002.0       10.0              0.0

260
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       서초구       방배동     2525.0        0.0              0.0

261
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       서초구       서초동     1311.0       10.0              0.0

262
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       서초구       서초동     1336.0        0.0              0.0

263
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       서초구       서초동     1359.0       50.0              0.0

264
   year  month  da

311
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       송파구       방이동      217.0        0.0              0.0

312
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       송파구       석촌동       54.0       31.0              0.0

313
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       송파구       송파동       14.0        0.0              0.0

314
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       송파구       송파동      164.0        0.0              0.0

315
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       송파구       신천동       11.0       10.0              0.0

316
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       송파구       오금동       54.0        4.0              0.0

317
   year  month  da

362
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       용산구      이태원동      198.0       16.0              0.0

363
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       용산구     한강로2가        2.0       11.0              0.0

364
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       용산구       한남동        1.0      349.0              0.0

365
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       용산구       한남동      723.0        3.0              0.0

366
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       용산구       효창동        5.0        1.0              0.0

367
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       용산구       후암동      143.0       23.0              0.0

368
   year  month  da

414
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       중랑구       상봉동      284.0       11.0              0.0

415
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       중랑구       신내동      479.0        0.0              0.0

416
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       중랑구       신내동      795.0        0.0              0.0

417
   year  month  day address_1 address_2  address_3  address_4  year_rent_price
0  2011      1    1       중랑구       중화동      207.0       14.0              0.0

