In [None]:
import numpy as np
import pandas as pd

In [None]:
train = pd.read_csv(r"C:\Users\USER\OneDrive - 한국외국어대학교\바탕 화면\open\train.csv")
test = pd.read_csv(r"C:\Users\USER\OneDrive - 한국외국어대학교\바탕 화면\open\test.csv")
submission = pd.read_csv(r"C:\Users\USER\OneDrive - 한국외국어대학교\바탕 화면\open\sample_submission.csv")
international = pd.read_csv(r"C:\Users\USER\OneDrive - 한국외국어대학교\바탕 화면\open\international_trade.csv")

In [None]:
test

Unnamed: 0,ID,timestamp,item,corporation,location
0,TG_A_J_20230304,2023-03-04,TG,A,J
1,TG_A_J_20230305,2023-03-05,TG,A,J
2,TG_A_J_20230306,2023-03-06,TG,A,J
3,TG_A_J_20230307,2023-03-07,TG,A,J
4,TG_A_J_20230308,2023-03-08,TG,A,J
...,...,...,...,...,...
1087,RD_F_J_20230327,2023-03-27,RD,F,J
1088,RD_F_J_20230328,2023-03-28,RD,F,J
1089,RD_F_J_20230329,2023-03-29,RD,F,J
1090,RD_F_J_20230330,2023-03-30,RD,F,J


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [None]:
# 주말 칼럼
train['timestamp'] = pd.to_datetime(train['timestamp'])
train['is_weekend'] = (train['timestamp'].dt.dayofweek >= 5).astype(int)


In [None]:
# 원핫인코딩
train_encoded = pd.get_dummies(train, columns=['item', 'corporation', 'location', '요일'])

In [None]:
# 불필요한 열 그냥 제거함
train_encoded.drop(['ID', 'timestamp', '공휴일'], axis=1, inplace=True)

In [None]:
# 데이터 정규화
scaler = MinMaxScaler(feature_range=(0, 1))
train_data = scaler.fit_transform(train['price(원/kg)'].values.reshape(-1, 1))

In [None]:
# 데이터셋 클래스 정의
class CustomDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

In [None]:
# 데이터셋 및 데이터로더 생성
X = []
y = []
time_steps = 7  # 일주일 단위
for i in range(len(train_data) - time_steps - 1):
    X.append(train_data[i:(i+time_steps), 0])
    y.append(train_data[i+time_steps, 0])

X = np.array(X)
y = np.array(y)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

train_dataset = CustomDataset(X_train, y_train)
val_dataset = CustomDataset(X_val, y_val)

batch_size = 32
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# LSTM 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

input_size = 1
hidden_size = 50
num_layers = 2
model = LSTMModel(input_size, hidden_size, num_layers)

In [None]:
# 손실 함수 및 옵티마이저 정의
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# 모델 학습
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_dataloader:
        optimizer.zero_grad()
        outputs = model(inputs.unsqueeze(-1).float())
        loss = criterion(outputs.squeeze(), targets.float())  # 타겟값과 비교
        loss.backward()
        optimizer.step()
    model.eval()

    with torch.no_grad():
        val_losses = []
        for inputs, targets in val_dataloader:
            outputs = model(inputs.unsqueeze(-1).float())
            val_loss = criterion(outputs.squeeze(), targets.float())
            val_losses.append(val_loss.item())
        avg_val_loss = np.mean(val_losses)

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {loss.item()}, Val Loss: {avg_val_loss}")

Epoch [1/100], Train Loss: 0.05314486101269722, Val Loss: 0.024165269547118305
Epoch [2/100], Train Loss: 0.01786140352487564, Val Loss: 0.024235999196647637
Epoch [3/100], Train Loss: 0.00448449794203043, Val Loss: 0.024489063314557757
Epoch [4/100], Train Loss: 0.020339002832770348, Val Loss: 0.024022509483495345
Epoch [5/100], Train Loss: 0.011346879415214062, Val Loss: 0.023329151283119683
Epoch [6/100], Train Loss: 0.005214008968323469, Val Loss: 0.024384764249696926
Epoch [7/100], Train Loss: 0.005935763008892536, Val Loss: 0.02382442667850003
Epoch [8/100], Train Loss: 0.012686729431152344, Val Loss: 0.024297459622136548
Epoch [9/100], Train Loss: 0.02668270654976368, Val Loss: 0.02394329356821379
Epoch [10/100], Train Loss: 0.005684077274054289, Val Loss: 0.02465153732261951
Epoch [11/100], Train Loss: 0.004978775978088379, Val Loss: 0.02490615831489014
Epoch [12/100], Train Loss: 0.001604898483492434, Val Loss: 0.02393256705154174
Epoch [13/100], Train Loss: 0.0169848389923572

In [None]:
with torch.no_grad():
    model.eval()
    inputs = torch.tensor(X).unsqueeze(-1).float()
    outputs = model(inputs)
    predicted_prices = scaler.inverse_transform(outputs.squeeze().numpy().reshape(-1, 1))

In [None]:
print("Predicted Prices:", predicted_prices)

Predicted Prices: [[1600.1278 ]
 [1382.4093 ]
 [1585.8717 ]
 ...
 [ 421.0619 ]
 [ 446.81165]
 [ 488.66345]]


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# MSE 및 MAE 계산
mse = mean_squared_error(y, predicted_prices)
mae = mean_absolute_error(y, predicted_prices)

# 결과 출력
print("MSE:", mse)
print("MAE:", mae)

MSE: 823017.6136552757
MAE: 447.0909830114945
