In [23]:
# 필요한 라이브러리를 불러옵니다.
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# 데이터를 불러옵니다.
problem2 = pd.read_csv('problem2.csv')

# 날짜 및 시간 데이터를 파이썬의 datetime 형식으로 변환합니다.
problem2['date'] = pd.to_datetime(problem2['date'])

# date 변수를 15분 간격으로 반올림합니다.
problem2['date_rounded'] = problem2['date'].dt.round('15min')

In [24]:
# 각 가구의 15분 간격 전력 사용량 합계를 계산합니다.
power_usage = problem2.groupby(['houseCode', 'date_rounded'])['power consumption'].sum().reset_index()

In [25]:
power_usage

Unnamed: 0,houseCode,date_rounded,power consumption
0,house_00,2050-01-01 00:00:00,79.985068
1,house_00,2050-01-01 00:15:00,108.850515
2,house_00,2050-01-01 00:30:00,87.270971
3,house_00,2050-01-01 00:45:00,199.178943
4,house_00,2050-01-01 01:00:00,148.277883
...,...,...,...
133960,house_44,2050-01-31 23:00:00,539.120985
133961,house_44,2050-01-31 23:15:00,217.810021
133962,house_44,2050-01-31 23:30:00,402.665761
133963,house_44,2050-01-31 23:45:00,220.654225


In [26]:
# KMeans 군집화를 사용하여 각 가구의 전력 사용량을 5개의 군집으로 분류합니다. 
# 군집화를 위해 reshape 합니다.
X = power_usage['power consumption'].values.reshape(-1, 1)

In [27]:
# KMeans를 사용하여 군집화를 수행합니다.
kmeans = KMeans(n_clusters=5, random_state=0).fit(X)

In [28]:
# 군집화 결과를 power_usage에 새로운 column으로 추가합니다.
power_usage['cluster'] = kmeans.labels_

In [33]:
problem2

Unnamed: 0,houseCode,date,power consumption,date_rounded
0,house_01,2050-01-14 21:00:00,57.559878,2050-01-14 21:00:00
1,house_26,2050-01-02 19:10:00,59.726880,2050-01-02 19:15:00
2,house_25,2050-01-13 07:20:00,6.558823,2050-01-13 07:15:00
3,house_20,2050-01-26 04:55:00,58.514589,2050-01-26 05:00:00
4,house_09,2050-01-18 22:50:00,5.717009,2050-01-18 22:45:00
...,...,...,...,...
401755,house_11,2050-01-14 06:15:00,58.752685,2050-01-14 06:15:00
401756,house_13,2050-01-29 03:10:00,75.546362,2050-01-29 03:15:00
401757,house_35,2050-01-09 08:25:00,92.245478,2050-01-09 08:30:00
401758,house_10,2050-01-27 13:25:00,15.911177,2050-01-27 13:30:00


In [32]:
power_usage

Unnamed: 0,houseCode,date_rounded,power consumption,cluster
0,house_00,2050-01-01 00:00:00,79.985068,2
1,house_00,2050-01-01 00:15:00,108.850515,2
2,house_00,2050-01-01 00:30:00,87.270971,2
3,house_00,2050-01-01 00:45:00,199.178943,0
4,house_00,2050-01-01 01:00:00,148.277883,0
...,...,...,...,...
133960,house_44,2050-01-31 23:00:00,539.120985,1
133961,house_44,2050-01-31 23:15:00,217.810021,0
133962,house_44,2050-01-31 23:30:00,402.665761,3
133963,house_44,2050-01-31 23:45:00,220.654225,0


In [30]:
# 군집화 결과를 problem2 데이터 프레임에 추가하려면, 먼저 power_usage 데이터 프레임을 problem2 데이터 프레임에 병합해야 합니다.
# power_usage 데이터 프레임을 problem2 데이터 프레임에 병합합니다.
problem3 = pd.merge(problem2, power_usage, on=['houseCode', 'date_rounded'], how='left')

# problem2 데이터 프레임을 출력합니다.
print(problem3)


       houseCode                date  power consumption_x        date_rounded  \
0       house_01 2050-01-14 21:00:00            57.559878 2050-01-14 21:00:00   
1       house_26 2050-01-02 19:10:00            59.726880 2050-01-02 19:15:00   
2       house_25 2050-01-13 07:20:00             6.558823 2050-01-13 07:15:00   
3       house_20 2050-01-26 04:55:00            58.514589 2050-01-26 05:00:00   
4       house_09 2050-01-18 22:50:00             5.717009 2050-01-18 22:45:00   
...          ...                 ...                  ...                 ...   
401755  house_11 2050-01-14 06:15:00            58.752685 2050-01-14 06:15:00   
401756  house_13 2050-01-29 03:10:00            75.546362 2050-01-29 03:15:00   
401757  house_35 2050-01-09 08:25:00            92.245478 2050-01-09 08:30:00   
401758  house_10 2050-01-27 13:25:00            15.911177 2050-01-27 13:30:00   
401759  house_31 2050-01-23 00:15:00            43.695042 2050-01-23 00:15:00   

        power consumption_y

In [14]:
# 이제 'date' 변수를 활용하여 요일을 만들고, 각 군집마다 요일별 15분 간격별 전력 사용량의 합을 계산해야 합니다.
# 요일을 계산하고, 월요일은 0, 일요일은 6으로 매핑됩니다.
problem2['weekday'] = problem2['date'].dt.weekday

In [15]:
problem2

Unnamed: 0,houseCode,date,power consumption_x,date_rounded,power consumption_y,cluster,weekday
0,house_01,2050-01-14 21:00:00,57.559878,2050-01-14 21:00:00,240.389560,4,4
1,house_26,2050-01-02 19:10:00,59.726880,2050-01-02 19:15:00,179.390416,0,6
2,house_25,2050-01-13 07:20:00,6.558823,2050-01-13 07:15:00,102.566444,2,3
3,house_20,2050-01-26 04:55:00,58.514589,2050-01-26 05:00:00,191.471002,0,2
4,house_09,2050-01-18 22:50:00,5.717009,2050-01-18 22:45:00,142.534061,2,1
...,...,...,...,...,...,...,...
401755,house_11,2050-01-14 06:15:00,58.752685,2050-01-14 06:15:00,308.323480,3,4
401756,house_13,2050-01-29 03:10:00,75.546362,2050-01-29 03:15:00,361.866398,3,5
401757,house_35,2050-01-09 08:25:00,92.245478,2050-01-09 08:30:00,232.882930,4,6
401758,house_10,2050-01-27 13:25:00,15.911177,2050-01-27 13:30:00,180.171899,0,3


In [None]:
# 각 군집마다 요일별 15분 간격별 전력 사용량의 합을 계산합니다.
cluster_day_usage = problem2.groupby(['cluster', 'weekday', 'date_rounded'])['power consumption'].sum().reset_index()

In [2]:
# 마지막으로, 요일별 전력 사용량을 히트맵으로 시각화하겠습니다.
# 요일별 전력 사용량의 피봇 테이블을 만듭니다.
pivot_table = cluster_day_usage.pivot_table(values='power consumption', index='weekday', columns='cluster', aggfunc=np.sum)

# 히트맵을 그립니다.
plt.figure(figsize=(10, 6))
sns.heatmap(pivot_table, cmap='YlGnBu')
plt.title('Power consumption by cluster and weekday')
plt.show()

       houseCode                date  power consumption_x        date_rounded  \
0       house_01 2050-01-14 21:00:00            57.559878 2050-01-14 21:00:00   
1       house_26 2050-01-02 19:10:00            59.726880 2050-01-02 19:15:00   
2       house_25 2050-01-13 07:20:00             6.558823 2050-01-13 07:15:00   
3       house_20 2050-01-26 04:55:00            58.514589 2050-01-26 05:00:00   
4       house_09 2050-01-18 22:50:00             5.717009 2050-01-18 22:45:00   
...          ...                 ...                  ...                 ...   
401755  house_11 2050-01-14 06:15:00            58.752685 2050-01-14 06:15:00   
401756  house_13 2050-01-29 03:10:00            75.546362 2050-01-29 03:15:00   
401757  house_35 2050-01-09 08:25:00            92.245478 2050-01-09 08:30:00   
401758  house_10 2050-01-27 13:25:00            15.911177 2050-01-27 13:30:00   
401759  house_31 2050-01-23 00:15:00            43.695042 2050-01-23 00:15:00   

        power consumption_y

KeyError: 'Column not found: power consumption'