In [21]:
# PyCaret 라이브러리 설치
!pip install pycaret



In [22]:
# PyCaret의 회귀 모듈 로드
from pycaret.regression import *

In [23]:
import pandas as pd

In [24]:
main_power_sensor_data = pd.read_csv('all_data/all_final_df/class_a_main_power_sensor.csv')
main_power_usage_sensor_data = pd.read_csv('all_data/all_final_df/class_a_main_power_usage_sensor.csv')
device_power_sensor_data = pd.read_csv('all_data/all_final_df/class_a_device_power_sensor.csv')
device_power_usage_sensor_data = pd.read_csv('all_data/all_final_df/class_a_device_power_usage_sensor.csv')
environmental_sensor_data = pd.read_csv('all_data/all_final_df/class_a_environmental_sensor.csv')

In [25]:
main_power_sensor_data['time'] = pd.to_datetime(main_power_sensor_data['time'])
main_power_usage_sensor_data['time'] = pd.to_datetime(main_power_usage_sensor_data['time'])
device_power_sensor_data['time'] = pd.to_datetime(device_power_sensor_data['time'])
device_power_usage_sensor_data['time'] = pd.to_datetime(device_power_usage_sensor_data['time'])
environmental_sensor_data['time'] = pd.to_datetime(environmental_sensor_data['time'])

In [26]:
# 두 데이터셋을 'time' 컬럼을 기준으로 병합
merged_data = pd.merge(environmental_sensor_data[['time', 'average_co2(ppm)', 'average_illumination(lux)']], device_power_sensor_data[['time', 'ac_out_power(Wh)', 'socket_power(Wh)']], on='time', how='inner')
print(merged_data)

# 결측치 확인 및 처리 (예: 중간값으로 채우기)
merged_data.fillna(merged_data.median(), inplace=True)

# 상관계수 계산
correlation_matrix = merged_data.corr()

target_variable = 'socket_power(Wh)'
sorted_correlations = correlation_matrix[target_variable].sort_values(ascending=False)


# 정렬된 상관계수 출력
print(sorted_correlations)

                   time  average_co2(ppm)  average_illumination(lux)  \
0   2024-04-15 01:00:00             6.451                      4.233   
1   2024-04-15 02:00:00             6.416                      2.065   
2   2024-04-15 03:00:00             6.382                      0.000   
3   2024-04-15 04:00:00             6.365                      0.000   
4   2024-04-15 05:00:00             6.346                      0.000   
..                  ...               ...                        ...   
571 2024-05-08 20:00:00             6.586                      4.204   
572 2024-05-08 21:00:00             6.590                      4.203   
573 2024-05-08 22:00:00             6.709                      4.203   
574 2024-05-08 23:00:00             6.670                      4.204   
575 2024-05-09 00:00:00             6.554                      4.196   

     ac_out_power(Wh)  socket_power(Wh)  
0             305.876           180.258  
1             293.722           174.952  
2        

In [27]:
# PyCaret 설정: 모델링 환경 설정
s = setup(data=merged_data, target='socket_power(Wh)', train_size=0.7,
          normalize=True, normalize_method='minmax',
          session_id=777)

# 모든 모델 비교
best_model = compare_models()

# 모델 최적화
tuned_model = tune_model(best_model)

# 모델 최종화
final_model = finalize_model(tuned_model)

# 예측
predictions = predict_model(final_model, data=testset)

Unnamed: 0,Description,Value
0,Session id,777
1,Target,socket_power(Wh)
2,Target type,Regression
3,Original data shape,"(576, 5)"
4,Transformed data shape,"(576, 7)"
5,Transformed train set shape,"(403, 7)"
6,Transformed test set shape,"(173, 7)"
7,Numeric features,3
8,Date features,1
9,Preprocess,True


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
rf,Random Forest Regressor,25.1844,1372.6253,36.4224,0.6964,0.1795,0.264,0.028
et,Extra Trees Regressor,25.0442,1391.9229,36.7734,0.6919,0.1808,0.2793,0.021
lightgbm,Light Gradient Boosting Machine,26.0366,1431.4172,37.2036,0.6821,0.1836,0.284,0.065
gbr,Gradient Boosting Regressor,27.7405,1561.4979,38.8323,0.6546,0.1884,0.283,0.017
ada,AdaBoost Regressor,31.0218,1708.1607,40.8269,0.6209,0.1956,0.2864,0.013
ridge,Ridge Regression,33.1517,1901.0219,43.1733,0.5747,0.2071,0.3058,0.007
br,Bayesian Ridge,33.0945,1905.7035,43.2135,0.5737,0.2073,0.3068,0.009
lr,Linear Regression,33.0645,1908.8822,43.2426,0.5729,0.2075,0.3073,0.257
huber,Huber Regressor,32.8351,1911.898,43.2077,0.572,0.2071,0.3054,0.009
lasso,Lasso Regression,33.8762,1942.4124,43.6223,0.5669,0.2089,0.3157,0.178


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,27.73,1454.7521,38.1412,0.6483,0.1392,0.1058
1,22.3769,954.5923,30.8965,0.7402,0.1183,0.0932
2,24.6915,1246.3358,35.3035,0.6639,0.1388,0.1058
3,26.4462,1368.9865,36.9998,0.6519,0.1363,0.102
4,25.4736,1076.5863,32.8114,0.7409,0.1168,0.0921
5,21.0951,851.6097,29.1824,0.8199,0.108,0.0804
6,30.6186,1426.5212,37.7693,0.6728,0.1353,0.1153
7,28.6131,1525.9276,39.0631,0.66,0.1417,0.1043
8,26.818,1308.8073,36.1774,0.7577,0.1336,0.1024
9,33.2567,3044.142,55.1737,0.4849,0.6544,1.911


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


KeyError: "['time'] not in index"