In [14]:
# PyCaret 라이브러리 설치
!pip install pycaret



In [15]:
# PyCaret의 회귀 모듈 로드
from pycaret.regression import *

In [16]:
import pandas as pd

In [17]:
main_power_sensor_data = pd.read_csv('all_data/all_final_df/class_a_main_power_sensor.csv')
main_power_usage_sensor_data = pd.read_csv('all_data/all_final_df/class_a_main_power_usage_sensor.csv')
device_power_sensor_data = pd.read_csv('all_data/all_final_df/class_a_device_power_sensor.csv')
device_power_usage_sensor_data = pd.read_csv('all_data/all_final_df/class_a_device_power_usage_sensor.csv')
environmental_sensor_data = pd.read_csv('all_data/all_final_df/class_a_environmental_sensor.csv')

In [18]:
main_power_sensor_data['time'] = pd.to_datetime(main_power_sensor_data['time'])
main_power_usage_sensor_data['time'] = pd.to_datetime(main_power_usage_sensor_data['time'])
device_power_sensor_data['time'] = pd.to_datetime(device_power_sensor_data['time'])
device_power_usage_sensor_data['time'] = pd.to_datetime(device_power_usage_sensor_data['time'])
environmental_sensor_data['time'] = pd.to_datetime(environmental_sensor_data['time'])

In [19]:
# 두 데이터셋을 'time' 컬럼을 기준으로 병합
merged_data = pd.merge(environmental_sensor_data[['time', 'average_co2(ppm)', 'average_illumination(lux)']], device_power_sensor_data[['time', 'ac_out_power(Wh)', 'socket_power(Wh)']], on='time', how='inner')
print(merged_data)

# 결측치 확인 및 처리 (예: 중간값으로 채우기)
merged_data.fillna(merged_data.median(), inplace=True)

# 상관계수 계산
correlation_matrix = merged_data.corr()

target_variable = 'socket_power(Wh)'
sorted_correlations = correlation_matrix[target_variable].sort_values(ascending=False)


# 정렬된 상관계수 출력
print(sorted_correlations)

                   time  average_co2(ppm)  average_illumination(lux)  \
0   2024-04-15 01:00:00             6.451                      4.233   
1   2024-04-15 02:00:00             6.416                      2.065   
2   2024-04-15 03:00:00             6.382                      0.000   
3   2024-04-15 04:00:00             6.365                      0.000   
4   2024-04-15 05:00:00             6.346                      0.000   
..                  ...               ...                        ...   
571 2024-05-08 20:00:00             6.586                      4.204   
572 2024-05-08 21:00:00             6.590                      4.203   
573 2024-05-08 22:00:00             6.709                      4.203   
574 2024-05-08 23:00:00             6.670                      4.204   
575 2024-05-09 00:00:00             6.554                      4.196   

     ac_out_power(Wh)  socket_power(Wh)  
0             305.876           180.258  
1             293.722           174.952  
2        

In [20]:
# PyCaret 설정: 모델링 환경 설정
s = setup(data=merged_data, target='socket_power(Wh)', train_size=0.7,
          normalize=True, normalize_method='minmax',
          session_id=777)

# 모든 모델 비교
best_model = compare_models()

# 모델 최적화
tuned_model = tune_model(best_model)

# 모델 최종화
final_model = finalize_model(tuned_model)

# 예측
predictions = predict_model(final_model, data=testset)

Unnamed: 0,Description,Value
0,Session id,777
1,Target,socket_power(Wh)
2,Target type,Regression
3,Original data shape,"(506, 4)"
4,Transformed data shape,"(506, 4)"
5,Transformed train set shape,"(354, 4)"
6,Transformed test set shape,"(152, 4)"
7,Numeric features,3
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
ada,AdaBoost Regressor,31.9702,1554.6285,39.3305,0.6348,0.1505,0.1294,0.012
lightgbm,Light Gradient Boosting Machine,29.4028,1578.6112,39.5217,0.6271,0.1479,0.1153,0.049
gbr,Gradient Boosting Regressor,29.8035,1590.0649,39.6711,0.6258,0.1495,0.1171,0.014
rf,Random Forest Regressor,29.4579,1611.2262,39.8032,0.6198,0.149,0.1155,0.023
et,Extra Trees Regressor,30.4727,1735.6901,41.491,0.588,0.1546,0.1199,0.018
ridge,Ridge Regression,35.9131,2005.7901,44.629,0.5297,0.1723,0.1444,0.004
br,Bayesian Ridge,35.9189,2005.4412,44.6269,0.5294,0.1725,0.1445,0.006
lr,Linear Regression,35.9258,2005.6939,44.6305,0.5292,0.1727,0.1446,0.246
lar,Least Angle Regression,35.9258,2005.6938,44.6305,0.5292,0.1727,0.1446,0.006
lasso,Lasso Regression,36.3195,2029.9734,44.8826,0.5264,0.1721,0.1456,0.252


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,32.183,1605.3574,40.0669,0.4996,0.1564,0.1355
1,29.0354,1588.7991,39.8597,0.5028,0.16,0.1258
2,28.2604,1514.7773,38.9201,0.7124,0.1475,0.1114
3,36.7431,1942.9111,44.0785,0.6118,0.156,0.1296
4,29.193,1203.2318,34.6876,0.7382,0.1287,0.1105
5,30.2056,1552.1508,39.3973,0.6637,0.1488,0.1221
6,29.8095,1617.4669,40.2177,0.6329,0.148,0.1125
7,35.3662,1880.6284,43.3662,0.6519,0.1573,0.1323
8,31.4036,1627.1656,40.3381,0.6641,0.1473,0.1213
9,24.8048,1018.3853,31.9122,0.6713,0.128,0.1086


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,AdaBoost Regressor,27.6679,1257.2566,35.4578,0.7218,0.1338,0.1093
