In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
### input ###

input_data = pd.read_csv('2023_smartFarm_AI_hackathon_dataset.csv')

print(input_data.head())

    frmDist      date  inTp  inHd  otmsuplyqy  acSlrdQy  cunt   ph  outTp  \
0  DBSF1059  20220926   0.0   0.0         0.0       0.0   0.0  0.0    0.0   
1  DBSF1059  20221001   0.0   0.0         0.0       0.0   0.0  0.0    0.0   
2  DBSF1059  20221002   0.0   0.0         0.0       0.0   0.0  0.0    0.0   
3  DBSF1059  20221003   0.0   0.0         0.0       0.0   0.0  0.0    0.0   
4  DBSF1059  20221004   0.0   0.0         0.0       0.0   0.0  0.0    0.0   

   outWs  ...  outtrn_cumsum   WaterUsage   WaterCost  FertilizerUsage  \
0    0.0  ...            0.0     0.000000    0.000000         0.000000   
1    0.0  ...            0.0  1347.554178  202.133127      4711.308821   
2    0.0  ...            0.0    69.899353   10.484903       271.379890   
3    0.0  ...            0.0   412.225065   61.833760      1539.505534   
4    0.0  ...            0.0   590.608634   88.591295      2614.973165   

   FertilizerCost     CO2Usage    CO2Cost  MistUsageTime    Mist Cost  \
0        0.000000  

In [15]:
input_data["frmDist"] = input_data["frmDist"].str[-4:] # frmDist 컬럼 str문자 삭제
pd.get_dummies(input_data, columns=["frmDist"])
print(input_data.head())

  frmDist      date  inTp  inHd  otmsuplyqy  acSlrdQy  cunt   ph  outTp  \
0    1059  20220926   0.0   0.0         0.0       0.0   0.0  0.0    0.0   
1    1059  20221001   0.0   0.0         0.0       0.0   0.0  0.0    0.0   
2    1059  20221002   0.0   0.0         0.0       0.0   0.0  0.0    0.0   
3    1059  20221003   0.0   0.0         0.0       0.0   0.0  0.0    0.0   
4    1059  20221004   0.0   0.0         0.0       0.0   0.0  0.0    0.0   

   outWs  ...  outtrn_cumsum   WaterUsage   WaterCost  FertilizerUsage  \
0    0.0  ...            0.0     0.000000    0.000000         0.000000   
1    0.0  ...            0.0  1347.554178  202.133127      4711.308821   
2    0.0  ...            0.0    69.899353   10.484903       271.379890   
3    0.0  ...            0.0   412.225065   61.833760      1539.505534   
4    0.0  ...            0.0   590.608634   88.591295      2614.973165   

   FertilizerCost     CO2Usage    CO2Cost  MistUsageTime    Mist Cost  \
0        0.000000     0.000000 

In [16]:
X = input_data.drop(['outtrn_cumsum', 'HeatingEnergyUsage_cumsum'], axis=1)
y = input_data[['outtrn_cumsum', 'HeatingEnergyUsage_cumsum']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 데이터 스플릿

In [18]:
# 숫자가 아닌 열을 삭제하고 전체 데이터를 NumPy 배열로 변환
X_train = X_train.select_dtypes(include=['float64', 'int64'])
y_train = y_train.select_dtypes(include=['float64', 'int64'])

X_train_numpy = X_train.to_numpy()
y_train_numpy = y_train.to_numpy()

In [19]:
# NumPy 배열을 Tensor로 변환
X_train_tensor = torch.Tensor(X_train_numpy).float()
y_train_tensor = torch.Tensor(y_train_numpy).float()

In [None]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_rate):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out

# Hyperparameters
CFG = {'BATCH_SIZE': 300,
       'EPOCHS': 1000,
       'INPUT_SIZE': 44,  # 입력 데이터의 특성 수에 맞게 설정
       'HIDDEN_SIZE': 512,
       'OUTPUT_SIZE': 2,
       'DROPOUT_RATE': 0.8,
       'LEARNING_RATE': 0.002}

model = NeuralNet(CFG['INPUT_SIZE'], CFG['HIDDEN_SIZE'], CFG['OUTPUT_SIZE'], CFG['DROPOUT_RATE'])
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=CFG['LEARNING_RATE'])

# TensorDataset 생성
train_data = torch.utils.data.TensorDataset(torch.Tensor(X_train.values).float(), torch.Tensor(y_train.values).float())
train_loader = torch.utils.data.DataLoader(train_data, batch_size=CFG['BATCH_SIZE'], shuffle=True)
y_train_tensor = torch.Tensor(y_train_numpy[:, 0]).float()

for epoch in range(CFG['EPOCHS']):
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)  # squeeze() 출력 크기 조정
        loss.backward()
        optimizer.step()
    total_loss = 0.0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()  # 배치 로스를 누적

    if (epoch + 1) % 100 == 0:
        average_loss = total_loss / len(train_loader) # 로스평균
        print(f"Epoch [{epoch+1}/{CFG['EPOCHS']}], Loss: {average_loss:.4f}") #로스 출력

with torch.no_grad():
    test_inputs = torch.Tensor(X_test.astype('float32').to_numpy())
    predicted = model(test_inputs).numpy()

rmse = np.sqrt(mean_squared_error(y_test, predicted))
r2score = r2_score(y_test, predicted)

Epoch [100/1000], Loss: 113651168810.8546


In [35]:
### output ###
print(f"RMSE:", rmse)
print(f"R2:", r2score)

RMSE: 315237.4655710469
R2: -0.00815990642347525
