In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_squared_error, r2_score

def calculate_rmse(targets, predictions):
    from sklearn.metrics import mean_squared_error
    return np.sqrt(mean_squared_error(targets, predictions))

def calculate_R2_score(y_test, y_pred):
    from sklearn.metrics import r2_score
    return r2_score(y_test, y_pred)

# 데이터 불러오기
input_data = pd.read_csv('2023_smartFarm_AI_hackathon_dataset.csv')

# 농가구역 One-hot encoding
input_data = pd.get_dummies(input_data, columns=['frmDist'], drop_first=False)

# na값 drop
input_data = input_data.dropna()

# 특성과 타겟 변수 설정
X = input_data.drop(columns=['outtrn_cumsum', 'HeatingEnergyUsage_cumsum'])
y = input_data[['outtrn_cumsum', 'HeatingEnergyUsage_cumsum']]

# 트레이닝, 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 표준화
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Extra Trees Regressor 모델
model = ExtraTreesRegressor(n_estimators=50, random_state=42)  # n_estimators 값을 줄임

# 하이퍼파라미터 그리드 설정 (RandomizedSearchCV 사용)
param_dist = {
    'n_estimators': [50, 100, 150],  # 더 작은 값을 시도
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

# RandomizedSearchCV 객체 생성
random_search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=10, cv=5, n_jobs=-1, verbose=1, scoring='neg_mean_squared_error', random_state=42)

# RandomizedSearchCV 수행
random_search.fit(X_train_scaled, y_train)

# 최적의 모델 선택
best_model = random_search.best_estimator_

# 중요한 특성만 선택 (임계값 조정 필요)
threshold = 0.01  # 중요도의 임계값
important_features = X_train.columns[best_model.feature_importances_ > threshold]
X_train_important = X_train_scaled[:, best_model.feature_importances_ > threshold]
X_test_important = X_test_scaled[:, best_model.feature_importances_ > threshold]

# 모델 재훈련
best_model.fit(X_train_important, y_train)

# 테스트 데이터로 예측
y_pred = best_model.predict(X_test_important)

# 평가
rmse = calculate_rmse(y_test, y_pred)
r2score = calculate_R2_score(y_test, y_pred)

print("Best Model RMSE with Important Features:", rmse)
print("Best Model R-squared (R2) with Important Features:", r2score)


Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Model RMSE with Important Features: 31212.121312050167
Best Model R-squared (R2) with Important Features: 0.9938363734932162


In [7]:
!pip3 install lightgbm


Collecting lightgbm
  Using cached lightgbm-4.1.0.tar.gz (1.7 MB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: lightgbm
  Building wheel for lightgbm (pyproject.toml) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for lightgbm [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[44 lines of output][0m
  [31m   [0m 2023-09-16 23:36:42,924 - scikit_build_core - INFO - CMake version: 3.27.4
  [31m   [0m [92m***[0m [1m[92mscikit-build-core 0.5.0[0m using [94mCMake 3.27.4[0m [91m(wheel)[0m[0m
  [31m   [0m 2023-09-16 23:36:42,926 - scikit_build_core - INFO - Build directory: /private/var/folders/jm/t447cbrj0rv_y73nfrlz2_f

In [10]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

def calculate_rmse(targets, predictions):
    return np.sqrt(mean_squared_error(targets, predictions))

def calculate_R2_score(y_test, y_pred):
    return r2_score(y_test, y_pred)

# 데이터 불러오기
input_data = pd.read_csv('2023_smartFarm_AI_hackathon_dataset.csv')

# 농가구역 One-hot encoding
input_data = pd.get_dummies(input_data, columns=['frmDist'], drop_first=False)

# na값 drop
input_data = input_data.dropna()

# 특성과 타겟 변수 설정
X = input_data.drop(columns=['outtrn_cumsum', 'HeatingEnergyUsage_cumsum'])
y = input_data[['outtrn_cumsum', 'HeatingEnergyUsage_cumsum']]

# 트레이닝, 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 표준화
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 딥러닝 회귀 모델 정의
model = keras.Sequential([
    keras.layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    keras.layers.Dropout(0.2),  # 드롭아웃 추가 (20%의 뉴런을 비활성화)
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.2),  # 드롭아웃 추가 (20%의 뉴런을 비활성화)
    keras.layers.Dense(2)  # 출력 뉴런 수는 예측할 타겟 변수 수와 동일해야 합니다.
])

# 모델 컴파일
model.compile(optimizer='adam', loss='mean_squared_error')

# 모델 훈련
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# 모델 평가
y_pred = model.predict(X_test_scaled)
rmse = calculate_rmse(y_test, y_pred)
r2score = calculate_R2_score(y_test, y_pred)

print("Deep Learning Model RMSE:", rmse)
print("Deep Learning Model R-squared (R2):", r2score)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Deep Learning Model RMSE: 214690.96209225975
Deep Learning Model R-squared (R2): 0.5101531948552973


In [24]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# RMSE 계산 함수 정의
def calculate_rmse(targets, predictions):
    return np.sqrt(mean_squared_error(targets, predictions))

# R2 스코어 계산 함수 정의
def calculate_R2_score(y_test, y_pred):
    return r2_score(y_test, y_pred)

# 데이터 불러오기
input_data = pd.read_csv('2023_smartFarm_AI_hackathon_dataset.csv')

# 데이터 전처리
# 'frmDist' 열을 기준으로 데이터 그룹화 및 평균값 계산하여 'group_df'에 저장
group_df = input_data.groupby('frmDist').mean()

# 'frmDist' 열을 기준으로 데이터 다시 그룹화하고, 'outtrn_cumsum' 열에서 최댓값 계산하여 'group_outtrn_cumsum_df'에 저장
group_outtrn_cumsum_df = input_data.groupby('frmDist').max()[['outtrn_cumsum']]

# 'group_df'에서 'outtrn_cumsum'과 'date' 열 제거
group_df.drop(['outtrn_cumsum', 'date'], axis=1, inplace=True)

# 'group_df'와 'group_outtrn_cumsum_df'를 합쳐 'merge_df' 데이터프레임 생성
merge_df = pd.concat([group_df, group_outtrn_cumsum_df], axis=1)

# 'merge_df'에서 입력 피처(독립 변수)와 타겟 변수(종속 변수) 분리
train_data = merge_df.drop(['outtrn_cumsum', 'HeatingEnergyUsage_cumsum'], axis=1)
target_data = merge_df.loc[:,['outtrn_cumsum', 'HeatingEnergyUsage_cumsum']]

# 데이터 분할: 훈련 데이터와 테스트 데이터로 분할
X_train, X_test, y_train, y_test = train_test_split(train_data, target_data, random_state=42, test_size=0.2)

# 데이터 표준화
ss = StandardScaler()
ss.fit(X_train)

train_scaled = ss.transform(X_train)
test_scaled = ss.transform(X_test)

# ExtraTreesRegressor 모델 설정 및 하이퍼파라미터 조정
et = ExtraTreesRegressor(
    n_estimators=100, 
    max_depth=10,  # 최대 트리 깊이를 제한
    min_samples_split=2,  # 노드를 분할하기 위한 최소 샘플 수
    min_samples_leaf=1,  # 리프 노드에 필요한 최소 샘플 수
    random_state=42
)

# 모델 훈련
et.fit(train_scaled, y_train)

# 모델을 사용하여 테스트 데이터에 대한 예측 수행
y_pred = et.predict(test_scaled)

# RMSE 및 R2 스코어 계산
rmse = calculate_rmse(y_test, y_pred) 
r2score = calculate_R2_score(y_test, y_pred)

# 결과 출력
print("RMSE:", rmse)
print("R2_score:", r2score)


RMSE: 15081.02447539747
R2_score: 0.9990819579665828


In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def calculate_rmse(targets, predictions):
    return np.sqrt(mean_squared_error(targets, predictions))

def calculate_R2_score(y_test, y_pred):
    return r2_score(y_test, y_pred)

input_data = pd.read_csv('2023_smartFarm_AI_hackathon_dataset.csv')

# 값이 0인 행을 제거하는 대신, 필요한 피처만 추출할 수 있습니다.
data_df = input_data[input_data['HeatingEnergyUsage_cumsum'] > 0]

group_df = data_df.groupby('frmDist').mean()
group_outtrn_cumsum_df = data_df.groupby('frmDist').max()[['outtrn_cumsum']]
group_df.drop(['outtrn_cumsum', 'date'], axis=1, inplace=True)
merge_df = pd.concat([group_df, group_outtrn_cumsum_df], axis=1)

train_data = merge_df.drop(['outtrn_cumsum', 'HeatingEnergyUsage_cumsum'], axis=1)
target_data = merge_df.loc[:,['outtrn_cumsum', 'HeatingEnergyUsage_cumsum']]

X_train, X_test, y_train, y_test = train_test_split(train_data, target_data, random_state=42, test_size=0.2)

ss = StandardScaler()
ss.fit(X_train)

train_scaled = ss.transform(X_train)
test_scaled = ss.transform(X_test)

# RandomForestRegressor 모델 설정
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(train_scaled, y_train)

y_pred = rf.predict(test_scaled)

rmse = calculate_rmse(y_test, y_pred)
r2score = calculate_R2_score(y_test, y_pred)

# 결과 출력
print("RMSE:", rmse)
print("R2_score:", r2score)


RMSE: 30195.54486401781
R2_score: 0.9643788725840623


ModuleNotFoundError: No module named 'lightgbm'

In [6]:
!pip install lightgbm

Collecting lightgbm
  Using cached lightgbm-4.1.0.tar.gz (1.7 MB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: lightgbm
  Building wheel for lightgbm (pyproject.toml) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for lightgbm [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[44 lines of output][0m
  [31m   [0m 2023-09-17 12:38:49,311 - scikit_build_core - INFO - CMake version: 3.27.4
  [31m   [0m [92m***[0m [1m[92mscikit-build-core 0.5.0[0m using [94mCMake 3.27.4[0m [91m(wheel)[0m[0m
  [31m   [0m 2023-09-17 12:38:49,313 - scikit_build_core - INFO - Build directory: /private/var/folders/jm/t447cbrj0rv_y73nfrlz2_f