## Import

In [1]:
import random
import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# import torch

# # GPU 메모리 비우기
# torch.cuda.empty_cache()

# # 새로운 GPU 컨텍스트 생성
# with torch.cuda.device(0):
#     torch.cuda.empty_cache()


In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [4]:
import torch

In [5]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 3090'

In [6]:
torch.cuda.is_available()

True

In [7]:
torch.__version__

'2.0.1+cu118'

In [8]:
# torch.cuda.empty_cache()

## Hyperparameter Setting

In [9]:
CFG = {
    'TRAIN_WINDOW_SIZE':120, # 90일치로 학습  초기는 90일이였음 
    'PREDICT_SIZE':21, # 21일치 예측
    'EPOCHS':23,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':4096,
    'SEED':41
}

In [10]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

### 데이터 불러오기

In [11]:
import pandas as pd
train_data = pd.read_csv('E:/LG/LG_data/train.csv').drop(columns=['ID', '제품'])

In [12]:
train_data.head(2)

Unnamed: 0,대분류,중분류,소분류,브랜드,2022-01-01,2022-01-02,2022-01-03,2022-01-04,2022-01-05,2022-01-06,...,2023-03-26,2023-03-27,2023-03-28,2023-03-29,2023-03-30,2023-03-31,2023-04-01,2023-04-02,2023-04-03,2023-04-04
0,B002-C001-0002,B002-C002-0007,B002-C003-0038,B002-00001,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-00002,0,0,0,0,0,0,...,0,0,0,1,3,2,0,0,2,0


In [16]:
import pandas as pd
from fbprophet import Prophet

# 데이터 로드 및 정리
data = pd.read_csv('E:/LG/LG_data/train.csv')  # 데이터 파일 경로를 지정
df = data[['대분류', '중분류', '소분류', '브랜드'] + data.columns[4:].tolist()]
df_melted = df.melt(id_vars=['대분류', '중분류', '소분류', '브랜드'], var_name='ds', value_name='y')

# Prophet용 데이터 프레임 생성
df_prophet = df_melted[df_melted['y'] > 0]  # 0이 아닌 값만 선택
df_prophet['ds'] = pd.to_datetime(df_prophet['ds'])

# Prophet 모델 초기화
model = Prophet()

# 모델 학습
model.fit(df_prophet)

# 미래 21일치 날짜 생성
future = model.make_future_dataframe(periods=21)

# 미래 21일치 판매량 예측
forecast = model.predict(future)

# 결과 시각화
fig = model.plot(forecast)


ModuleNotFoundError: No module named 'fbprophet'

In [17]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

# 데이터 로드 및 전처리
data = pd.read_csv('E:/LG/LG_data/train.csv')
df = data[['대분류', '중분류', '소분류', '브랜드'] + data.columns[4:].tolist()]

# 판매량 데이터만 추출
sales_data = df.melt(id_vars=['대분류', '중분류', '소분류', '브랜드'], var_name='날짜', value_name='판매량')
sales_data['날짜'] = pd.to_datetime(sales_data['날짜'])

# Pivot 테이블 생성
pivot_table = sales_data.pivot_table(index=['대분류', '중분류', '소분류', '브랜드'], columns='날짜', values='판매량').reset_index()

# NaN 값을 0으로 대체
pivot_table = pivot_table.fillna(0)

# LSTM 입력 데이터 생성
X = pivot_table.values[:, 4:-21]
y = pivot_table.values[:, -21:]

# 데이터 스케일링
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.fit_transform(y)

# LSTM 모델 구축
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_scaled.shape[1], X_scaled.shape[2])))
model.add(Dense(21))  # 21일치 예측
model.compile(optimizer='adam', loss='mse')

# 모델 학습
model.fit(X_scaled, y_scaled, epochs=50, batch_size=16, verbose=2)

# 미래 21일치 판매량 예측
future_X = X_scaled[:, -21:]  # 가장 최근 21일치 데이터를 사용
future_X = np.reshape(future_X, (future_X.shape[0], 1, future_X.shape[1]))
future_predictions_scaled = model.predict(future_X)

# 결과 시각화
plt.figure(figsize=(10, 6))
plt.plot(np.arange(1, 22), scaler.inverse_transform(future_predictions_scaled[0]), label='Predicted')
plt.title('Future Sales Prediction')
plt.xlabel('Days')
plt.ylabel('Sales')
plt.legend()
plt.show()


ImportError: Could not find the DLL(s) 'msvcp140_1.dll'. TensorFlow requires that these DLLs be installed in a directory that is named in your %PATH% environment variable. You may install these DLLs by downloading "Microsoft C++ Redistributable for Visual Studio 2015, 2017 and 2019" for your platform from this URL: https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads

In [None]:
import random
import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')



CFG = {
    'TRAIN_WINDOW_SIZE':120, # 90일치로 학습  초기는 90일이였음 
    'PREDICT_SIZE':21, # 21일치 예측
    'EPOCHS':30,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':4096,
    'SEED':41
}



def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정



train_data = pd.read_csv('E:/LG/LG_data/train.csv').drop(columns=['ID', '제품'])




import pandas as pd

# 더 작은 값을 추가하여 분모가 0인 경우 방지하는 상수 정의
EPSILON = 1e-8

numeric_cols = train_data.columns[4:]

# 칵 column의 min 및 max 계산
min_values = train_data[numeric_cols].min(axis=1)
max_values = train_data[numeric_cols].max(axis=1)

# 각 행의 범위(max-min)를 계산하고, 범위가 0인 경우 EPSILON 더해주기
ranges = max_values - min_values + EPSILON

# min-max scaling 수행
scaled_data = (train_data[numeric_cols].subtract(min_values, axis=0)).div(ranges, axis=0)

# 스케일링된 데이터로 업데이트
train_data[numeric_cols] = scaled_data

# max와 min 값을 dictionary 형태로 저장
scale_min_dict = min_values.to_dict()
scale_max_dict = max_values.to_dict()





# Label Encoding
label_encoder = LabelEncoder()
categorical_columns = ['대분류', '중분류', '소분류', '브랜드']

for col in categorical_columns:
    label_encoder.fit(train_data[col])
    train_data[col] = label_encoder.transform(train_data[col])


def make_train_data(data, train_size=CFG['TRAIN_WINDOW_SIZE'], predict_size=CFG['PREDICT_SIZE']):
    '''
    학습 기간 블럭, 예측 기간 블럭의 세트로 데이터를 생성
    data : 일별 판매량
    train_size : 학습에 활용할 기간
    predict_size : 추론할 기간
    '''
    num_rows = len(data)
    window_size = train_size + predict_size
    
    input_data = np.empty((num_rows * (len(data.columns) - window_size + 1), train_size, len(data.iloc[0, :4]) + 1))
    target_data = np.empty((num_rows * (len(data.columns) - window_size + 1), predict_size))
    
    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])
        sales_data = np.array(data.iloc[i, 4:])
        
        for j in range(len(sales_data) - window_size + 1):
            window = sales_data[j : j + window_size]
            temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
            input_data[i * (len(data.columns) - window_size + 1) + j] = temp_data
            target_data[i * (len(data.columns) - window_size + 1) + j] = window[train_size:]
    
    return input_data, target_data





def make_predict_data(data, train_size=CFG['TRAIN_WINDOW_SIZE']):
    '''
    평가 데이터(Test Dataset)를 추론하기 위한 Input 데이터를 생성
    data : 일별 판매량
    train_size : 추론을 위해 필요한 일별 판매량 기간 (= 학습에 활용할 기간)
    '''
    num_rows = len(data)
    
    input_data = np.empty((num_rows, train_size, len(data.iloc[0, :4]) + 1))
    
    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])
        sales_data = np.array(data.iloc[i, -train_size:])
        
        window = sales_data[-train_size : ]
        temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
        input_data[i] = temp_data
    
    return input_data


train_input, train_target = make_train_data(train_data)
test_input = make_predict_data(train_data)





data_len = len(train_input)
val_ratio = 0.1
test_ratio = 0.1

val_len = int(data_len * val_ratio)
test_len = int(data_len * test_ratio)

val_input = train_input[-val_len:]
val_target = train_target[-val_len:]

train_input = train_input[:-val_len - test_len]
train_target = train_target[:-val_len - test_len]





train_input.shape, train_target.shape, val_input.shape, val_target.shape, test_input.shape


class CustomDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        
    def __getitem__(self, index):
        if self.Y is not None:
            return torch.Tensor(self.X[index]), torch.Tensor(self.Y[index])
        return torch.Tensor(self.X[index])
    
    def __len__(self):
        return len(self.X)




train_dataset = CustomDataset(train_input, train_target)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_input, val_target)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

class ImprovedModel(nn.Module):
    def __init__(self, input_size=5, hidden_size=512, num_layers=2, output_size=CFG['PREDICT_SIZE'], dropout_prob=0.2):
        super(ImprovedModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.ln = nn.LayerNorm(hidden_size)  # Layer Normalization
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Linear(hidden_size // 2, output_size)
        )
        self.actv = nn.ReLU()

    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size, x.device)

        gru_out, hidden = self.gru(x, hidden)
        gru_out = self.dropout(gru_out)
        gru_out = self.ln(gru_out)  # Applying Layer Normalization

        last_output = gru_out[:, -1, :]

        output = self.actv(self.fc(last_output))

        return output.squeeze(1)
    def init_hidden(self, batch_size, device):
        return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device)

def train(model, optimizer, train_loader, val_loader, device, scheduler= None):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    best_loss = 9999999
    best_model = None
    
    
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        train_mae = []
        
        for X, Y in tqdm(iter(train_loader)):
            X = X.to(device)
            Y = Y.to(device)

            
            optimizer.zero_grad()
            
            output = model(X)
            loss = criterion(output, Y)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss = validation(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}]')
        

        # # 학습 루프 안에서
        # if best_loss > val_loss:
        #     best_loss = val_loss
        #     best_model = model
        #     print('Model Saved')

        # 학습이 끝난 후
    return model  # 모든 모델을 반환


def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []
    
    with torch.no_grad():
        for X, Y in tqdm(iter(val_loader)):
            X = X.to(device)
            Y = Y.to(device)
            
            output = model(X)
            loss = criterion(output, Y)
            
            val_loss.append(loss.item())
    return np.mean(val_loss)

model = ImprovedModel() # BaseModel() 
optimizer = torch.optim.RAdam(params = model.parameters(), lr = CFG["LEARNING_RATE"])


infer_model = train(model, optimizer, train_loader, val_loader, device)

In [25]:
import random
import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# Set device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

CFG = {
    'TRAIN_WINDOW_SIZE': 120,
    'PREDICT_SIZE': 21,
    'EPOCHS': 30,
    'LEARNING_RATE': 1e-4,
    'BATCH_SIZE': 4096,
    'SEED': 41
}

# Seed everything
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED'])

# Load and preprocess data
train_data = pd.read_csv('E:/LG/LG_data/train.csv').drop(columns=['ID', '제품'])

# Data preprocessing function
def preprocess_data(data):
    EPSILON = 1e-8
    numeric_cols = data.columns[4:]
    
    min_values = data[numeric_cols].min(axis=1)
    max_values = data[numeric_cols].max(axis=1)
    ranges = max_values - min_values + EPSILON
    
    scaled_data = (data[numeric_cols].subtract(min_values, axis=0)).div(ranges, axis=0)
    
    scale_min_dict = min_values.to_dict()
    scale_max_dict = max_values.to_dict()
    
    label_encoder = LabelEncoder()
    categorical_columns = ['대분류', '중분류', '소분류', '브랜드']
    
    for col in categorical_columns:
        label_encoder.fit(data[col])
        data[col] = label_encoder.transform(data[col])
    
    return scaled_data, scale_min_dict, scale_max_dict

scaled_data, scale_min_dict, scale_max_dict = preprocess_data(train_data)

# Define custom dataset
class CustomDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        
    def __getitem__(self, index):
        if self.Y is not None:
            return torch.Tensor(self.X[index]), torch.Tensor(self.Y[index])
        return torch.Tensor(self.X[index])
    
    def __len__(self):
        return len(self.X)

# Model class
class ImprovedModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_prob):
        super(ImprovedModel, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.ln = nn.LayerNorm(hidden_size)  # Layer Normalization
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Linear(hidden_size // 2, output_size)
        )
        self.actv = nn.ReLU()

    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size, x.device)

        gru_out, hidden = self.gru(x, hidden)
        gru_out = self.dropout(gru_out)
        gru_out = self.ln(gru_out)  # Applying Layer Normalization

        last_output = gru_out[:, -1, :]

        output = self.actv(self.fc(last_output))

        return output.squeeze(1)
    
    def init_hidden(self, batch_size, device):
        return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device)

# Define the make_train_data and make_predict_data functions
def make_train_data(data, train_size=CFG['TRAIN_WINDOW_SIZE'], predict_size=CFG['PREDICT_SIZE']):
    num_rows = len(data)
    window_size = train_size + predict_size

    input_data = np.empty((num_rows * (len(data.columns) - window_size + 1), train_size, len(data.iloc[0, :4]) + 1))
    target_data = np.empty((num_rows * (len(data.columns) - window_size + 1), predict_size))

    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])
        sales_data = np.array(data.iloc[i, 4:])

        for j in range(len(sales_data) - window_size + 1):
            window = sales_data[j : j + window_size]
            temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
            input_data[i * (len(data.columns) - window_size + 1) + j] = temp_data
            target_data[i * (len(data.columns) - window_size + 1) + j] = window[train_size:]

    return input_data, target_data

def make_predict_data(data, train_size=CFG['TRAIN_WINDOW_SIZE']):
    num_rows = len(data)

    input_data = np.empty((num_rows, train_size, len(data.iloc[0, :4]) + 1))

    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])
        sales_data = np.array(data.iloc[i, -train_size:])

        window = sales_data[-train_size:]
        temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
        input_data[i] = temp_data

    return input_data

# Training function
def train_model(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    best_loss = float('inf')
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS'] + 1):
        model.train()
        train_loss = []
        
        for X, Y in tqdm(train_loader):
            X = X.to(device)
            Y = Y.to(device)
            
            optimizer.zero_grad()
            
            output = model(X)
            loss = criterion(output, Y)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss = validate_model(model, val_loader, criterion, device)
        print(f'Epoch [{epoch}/{CFG["EPOCHS"]}] - Train Loss: {np.mean(train_loss):.5f} - Val Loss: {val_loss:.5f}')
        
        # # Save the best model
        # if val_loss < best_loss:
        #     best_loss = val_loss
        #     best_model = model.state_dict()
        #     print('Best model updated')

    return model

# Validation function
def validate_model(model, val_loader, criterion, device):
    model.eval()
    val_loss = []

    with torch.no_grad():
        for X, Y in tqdm(val_loader):
            X = X.to(device)
            Y = Y.to(device)

            output = model(X)
            loss = criterion(output, Y)

            val_loss.append(loss.item())

    return np.mean(val_loss)

def main():
    # Create train and validation datasets...
    train_input, train_target = make_train_data(scaled_data)
    val_input = make_predict_data(scaled_data)

    train_dataset = CustomDataset(train_input, train_target)
    val_dataset = CustomDataset(val_input, None)  # Target is None for prediction

    # Create data loaders...
    train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True, num_workers=0)
    val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

    # Initialize and train model...
    model = ImprovedModel(
        input_size=train_input.shape[2],  # Define input_size based on your data
        hidden_size=512,                  # Define hidden_size
        num_layers=2,                     # Define num_layers
        output_size=CFG['PREDICT_SIZE'],
        dropout_prob=0.2                  # Define dropout_prob
    )
    optimizer = torch.optim.RAdam(params=model.parameters(), lr=CFG["LEARNING_RATE"])
    trained_model = train_model(model, optimizer, train_loader, val_loader, device)

if __name__ == "__main__":
    main()


100%|██████████| 15890/15890 [02:10<00:00, 122.03it/s]
100%|██████████| 15890/15890 [00:03<00:00, 4163.13it/s]
100%|██████████| 1238/1238 [11:26<00:00,  1.80it/s]
  0%|          | 0/4 [00:00<?, ?it/s]


ValueError: too many values to unpack (expected 2)

In [22]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

# 데이터 로드 및 전처리
data = pd.read_csv('E:/LG/LG_data/train.csv')
df = data[['대분류', '중분류', '소분류', '브랜드'] + data.columns[4:].tolist()]

# 판매량 데이터만 추출
sales_data = df.melt(id_vars=['대분류', '중분류', '소분류', '브랜드'], var_name='날짜', value_name='판매량')
sales_data['날짜'] = pd.to_datetime(sales_data['날짜'])

# Pivot 테이블 생성
pivot_table = sales_data.pivot_table(index=['대분류', '중분류', '소분류', '브랜드'], columns='날짜', values='판매량').reset_index()

# NaN 값을 0으로 대체
pivot_table = pivot_table.fillna(0)


# LSTM 입력 데이터 생성
X = pivot_table.pivot_table(index=['대분류', '중분류', '소분류', '브랜드'], columns='날짜', values='판매량').fillna(0).values[:, :-21]
y = pivot_table.pivot_table(index=['대분류', '중분류', '소분류', '브랜드'], columns='날짜', values='판매량').fillna(0).values[:, -21:]

# 데이터 스케일링
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.fit_transform(y)

# PyTorch를 위한 텐서로 변환
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y_scaled, dtype=torch.float32)

# LSTM 모델 구축
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # 마지막 시점의 출력만 사용
        return out

input_size = X_tensor.shape[1]
hidden_size = 64
num_layers = 2
output_size = 21  # 21일치 예측

model = LSTMModel(input_size, hidden_size, num_layers, output_size)

# 손실 함수와 최적화 알고리즘 설정
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 모델 학습
num_epochs = 100
for epoch in range(num_epochs):
    outputs = model(X_tensor.unsqueeze(0))  # 배치 차원 추가
    optimizer.zero_grad()
    loss = criterion(outputs, y_tensor)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# # 미래 21일치 판매량 예측
# future_X = X_scaled[:, -21:]
# future_X_tensor = torch.tensor(future_X, dtype=torch.float32)
# future_X_tensor = future_X_tensor.unsqueeze(0)  # 배치 차원 추가
# future_predictions_scaled = model(future_X_tensor).detach().numpy()

# # 결과 시각화
# plt.figure(figsize=(10, 6))
# plt.plot(np.arange(1, 22), scaler.inverse_transform(future_predictions_scaled)[0], label='Predicted')
# plt.title('Future Sales Prediction')
# plt.xlabel('Days')
# plt.ylabel('Sales')
# plt.legend()
# plt.show()


ValueError: Per-column arrays must each be 1-dimensional

In [14]:
import pandas as pd

import pandas as pd
train_data = pd.read_csv('E:/LG/LG_data/train.csv').drop(columns=['ID', '제품'])


# '날짜' 열을 행으로 변환
df_melted = pd.melt(train_data, id_vars=['대분류', '중분류', '소분류', '브랜드'], var_name='날짜', value_name='판매량')

# '날짜' 열의 데이터를 날짜형으로 변환
df_melted['날짜'] = pd.to_datetime(df_melted['날짜'])

# 결과 출력
print(df_melted.tail())


                    대분류             중분류             소분류         브랜드  \
7293505  B002-C001-0003  B002-C002-0008  B002-C003-0042  B002-03799   
7293506  B002-C001-0003  B002-C002-0008  B002-C003-0044  B002-03799   
7293507  B002-C001-0003  B002-C002-0008  B002-C003-0044  B002-03799   
7293508  B002-C001-0003  B002-C002-0008  B002-C003-0044  B002-03799   
7293509  B002-C001-0002  B002-C002-0004  B002-C003-0020  B002-03799   

                날짜  판매량  
7293505 2023-04-04    0  
7293506 2023-04-04    3  
7293507 2023-04-04    0  
7293508 2023-04-04    2  
7293509 2023-04-04    0  


In [198]:
import pandas as pd

# CSV 파일 불러오기
csv_file_path = 'E:/LG/LG_data/brand_keyword_cnt.csv'
df_loaded = pd.read_csv(csv_file_path)

# '날짜' 열을 행으로 변환 (melt 사용)
df_melteds = pd.melt(df_loaded, id_vars=['브랜드'], var_name='날짜', value_name='검색수')

# '날짜' 열의 데이터를 날짜형으로 변환
df_melteds['날짜'] = pd.to_datetime(df_melted['날짜'])

# 결과 출력
print(df_melteds.tail())


                브랜드         날짜      검색수
1455025  B002-03794 2022-04-02  1.78416
1455026  B002-03795 2022-04-02  0.00000
1455027  B002-03796 2022-04-02  0.07252
1455028  B002-03798 2022-04-02  0.10153
1455029  B002-03799 2022-04-02  5.07687


In [204]:
import pandas as pd

# 첫 번째 CSV 파일 불러오기
csv_file_path1 = 'E:/LG/LG_data/train.csv'
train_data = pd.read_csv(csv_file_path1).drop(columns=['ID', '제품'])

# '날짜' 열을 행으로 변환
df_melted = pd.melt(train_data, id_vars=['대분류', '중분류', '소분류', '브랜드'], var_name='날짜', value_name='판매량')

# '날짜' 열의 데이터를 날짜형으로 변환
df_melted['날짜'] = pd.to_datetime(df_melted['날짜'])

# 두 번째 CSV 파일 불러오기
csv_file_path2 = 'E:/LG/LG_data/brand_keyword_cnt.csv'
df_loaded = pd.read_csv(csv_file_path2)

# '날짜' 열을 행으로 변환 (melt 사용)
df_melteds = pd.melt(df_loaded, id_vars=['브랜드'], var_name='날짜', value_name='검색수')

# '날짜' 열의 데이터를 날짜형으로 변환
df_melteds['날짜'] = pd.to_datetime(df_melteds['날짜'])

# 브랜드명을 기준으로 데이터프레임 병합 (merge 사용)
result_df = df_melted.merge(df_melteds, how='left', left_on=['브랜드', '날짜'], right_on=['브랜드', '날짜'])


# 판매량이나 검색수가 0이 아닌 데이터만 선택하여 새로운 데이터프레임 생성
filtered_result_df = result_df[(result_df['판매량'] != 0) & (result_df['검색수'] != 0)]


# 결과 출력
print(filtered_result_df.head())

# 새로운 파일로 저장
result_file_path = 'E:/LG/LG_data/filtered_merged_data.csv'
result_df.to_csv(result_file_path, index=False)
print(f"데이터가 {result_file_path}에 저장되었습니다.")


               대분류             중분류             소분류         브랜드         날짜  \
32  B002-C001-0002  B002-C002-0007  B002-C003-0036  B002-00010 2022-01-01   
34  B002-C001-0002  B002-C002-0007  B002-C003-0036  B002-00012 2022-01-01   
35  B002-C001-0002  B002-C002-0005  B002-C003-0025  B002-00013 2022-01-01   
36  B002-C001-0002  B002-C002-0005  B002-C003-0025  B002-00013 2022-01-01   
40  B002-C001-0001  B002-C002-0001  B002-C003-0003  B002-00017 2022-01-01   

    판매량       검색수  
32    2  0.333620  
34    2  0.362630  
35   10  2.901070  
36   10  2.901070  
40    6  0.115993  
데이터가 E:/LG/LG_data/filtered_merged_data.csv에 저장되었습니다.


In [212]:
import pandas as pd

# 첫 번째 CSV 파일 불러오기
csv_file_path1 = 'E:/LG/LG_data/train.csv'
train_data = pd.read_csv(csv_file_path1).drop(columns=['ID', '제품'])

# '날짜' 열을 행으로 변환
df_melted = pd.melt(train_data, id_vars=['대분류', '중분류', '소분류', '브랜드'], var_name='날짜', value_name='판매량')

# '날짜' 열의 데이터를 날짜형으로 변환
df_melted['날짜'] = pd.to_datetime(df_melted['날짜'])

# 두 번째 CSV 파일 불러오기
csv_file_path2 = 'E:/LG/LG_data/brand_keyword_cnt.csv'
df_loaded = pd.read_csv(csv_file_path2)

# '날짜' 열을 행으로 변환 (melt 사용)
df_melteds = pd.melt(df_loaded, id_vars=['브랜드'], var_name='날짜', value_name='검색수')

# '날짜' 열의 데이터를 날짜형으로 변환
df_melteds['날짜'] = pd.to_datetime(df_melteds['날짜'])

# 브랜드명을 기준으로 데이터프레임 병합 (merge 사용)
result_df = df_melted.merge(df_melteds, how='left', left_on=['브랜드', '날짜'], right_on=['브랜드', '날짜'])

# 판매량이나 검색수가 0이 아닌 데이터만 선택하여 새로운 데이터프레임 생성
filtered_result_df = result_df[(result_df['판매량'] != 0) & (result_df['검색수'] != 0)]

# 판매량과 검색수가 같은 경우를 제거
filtered_result_df = filtered_result_df[filtered_result_df['판매량'] != filtered_result_df['검색수']]

# 결과 출력
print(filtered_result_df.head())

# 새로운 파일로 저장
result_file_path = 'E:/LG/LG_data/filtered_merged_data.csv'
filtered_result_df.to_csv(result_file_path, index=False)
print(f"데이터가 {result_file_path}에 저장되었습니다.")


               대분류             중분류             소분류         브랜드         날짜  \
32  B002-C001-0002  B002-C002-0007  B002-C003-0036  B002-00010 2022-01-01   
34  B002-C001-0002  B002-C002-0007  B002-C003-0036  B002-00012 2022-01-01   
35  B002-C001-0002  B002-C002-0005  B002-C003-0025  B002-00013 2022-01-01   
36  B002-C001-0002  B002-C002-0005  B002-C003-0025  B002-00013 2022-01-01   
40  B002-C001-0001  B002-C002-0001  B002-C003-0003  B002-00017 2022-01-01   

    판매량       검색수  
32    2  0.333620  
34    2  0.362630  
35   10  2.901070  
36   10  2.901070  
40    6  0.115993  
데이터가 E:/LG/LG_data/filtered_merged_data.csv에 저장되었습니다.


In [223]:
import pandas as pd

# 첫 번째 CSV 파일 불러오기
csv_file_path1 = 'E:/LG/LG_data/train.csv'
train_data = pd.read_csv(csv_file_path1).drop(columns=['ID', '제품'])

# '날짜' 열을 행으로 변환
df_melted = pd.melt(train_data, id_vars=['대분류', '중분류', '소분류', '브랜드'], var_name='날짜', value_name='판매량')

# '날짜' 열의 데이터를 날짜형으로 변환
df_melted['날짜'] = pd.to_datetime(df_melted['날짜'])

# 두 번째 CSV 파일 불러오기
csv_file_path2 = 'E:/LG/LG_data/brand_keyword_cnt.csv'
df_loaded = pd.read_csv(csv_file_path2)

# '날짜' 열을 행으로 변환 (melt 사용)
df_melteds = pd.melt(df_loaded, id_vars=['브랜드'], var_name='날짜', value_name='검색수')

# '날짜' 열의 데이터를 날짜형으로 변환
df_melteds['날짜'] = pd.to_datetime(df_melteds['날짜'])

# 브랜드명을 기준으로 데이터프레임 병합 (merge 사용)
result_df = df_melted.merge(df_melteds, how='left', left_on=['브랜드', '날짜'], right_on=['브랜드', '날짜'])

# 판매량과 검색수가 0이 아닌 경우만 선택하여 새로운 데이터프레임 생성
filtered_result_df = result_df[(result_df['판매량'] != 0) & (result_df['검색수'] != 0)]

# 판매량과 검색수가 같은 경우를 제거
filtered_result_df = filtered_result_df[filtered_result_df['판매량'] != filtered_result_df['검색수']]

# 결과 출력
print(filtered_result_df.head())

# 행 수를 15889개로 조절
desired_row_count = 15889
filtered_result_df = filtered_result_df[:desired_row_count]

# 새로운 파일로 저장
result_file_path = 'E:/LG/LG_data/filtered_merged_data.csv'
result_df.to_csv(result_file_path, index=False)
print(f"데이터가 {result_file_path}에 저장되었습니다.")


               대분류             중분류             소분류         브랜드         날짜  \
32  B002-C001-0002  B002-C002-0007  B002-C003-0036  B002-00010 2022-01-01   
34  B002-C001-0002  B002-C002-0007  B002-C003-0036  B002-00012 2022-01-01   
35  B002-C001-0002  B002-C002-0005  B002-C003-0025  B002-00013 2022-01-01   
36  B002-C001-0002  B002-C002-0005  B002-C003-0025  B002-00013 2022-01-01   
40  B002-C001-0001  B002-C002-0001  B002-C003-0003  B002-00017 2022-01-01   

    판매량       검색수  
32    2  0.333620  
34    2  0.362630  
35   10  2.901070  
36   10  2.901070  
40    6  0.115993  
데이터가 E:/LG/LG_data/filtered_merged_data.csv에 저장되었습니다.


In [224]:
result_df.tail(30)

Unnamed: 0,대분류,중분류,소분류,브랜드,날짜,판매량,검색수
7293480,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,2,0.10153
7293481,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0,0.10153
7293482,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0,0.10153
7293483,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0,0.10153
7293484,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0,0.10153
7293485,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0,0.10153
7293486,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0,0.10153
7293487,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0,0.10153
7293488,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0,0.10153
7293489,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0,0.10153


In [225]:
filtered_result_df.describe()

Unnamed: 0,날짜,판매량,검색수
count,15889,15889.0,15660.0
mean,2022-01-03 07:11:56.193593088,47.769526,7.140524
min,2022-01-01 00:00:00,1.0,0.072398
25%,2022-01-02 00:00:00,3.0,0.319115
50%,2022-01-03 00:00:00,7.0,1.01537
75%,2022-01-05 00:00:00,23.0,3.06063
max,2022-01-06 00:00:00,15056.0,875.140589
std,,272.681637,26.574081


In [226]:
filtered_result_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 15889 entries, 32 to 83792
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   대분류     15889 non-null  object        
 1   중분류     15889 non-null  object        
 2   소분류     15889 non-null  object        
 3   브랜드     15889 non-null  object        
 4   날짜      15889 non-null  datetime64[ns]
 5   판매량     15889 non-null  int64         
 6   검색수     15660 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1), object(4)
memory usage: 993.1+ KB


In [227]:
filtered_result_df.tail(5)

Unnamed: 0,대분류,중분류,소분류,브랜드,날짜,판매량,검색수
83781,B002-C001-0002,B002-C002-0005,B002-C003-0025,B002-01040,2022-01-06,20,0.62373
83783,B002-C001-0002,B002-C002-0005,B002-C003-0025,B002-01040,2022-01-06,10,0.62373
83784,B002-C001-0002,B002-C002-0005,B002-C003-0025,B002-01040,2022-01-06,350,0.62373
83785,B002-C001-0002,B002-C002-0005,B002-C003-0025,B002-01040,2022-01-06,350,0.62373
83792,B002-C001-0002,B002-C002-0006,B002-C003-0034,B002-01045,2022-01-06,2,12.706759


### 데이터 전처리

In [228]:
import pandas as pd

# 주어진 데이터의 판매량과 검색수를 Min-Max 스케일링
result_df['판매량'] = (result_df['판매량'] - result_df['판매량'].min()) / (result_df['판매량'].max() - result_df['판매량'].min()) 
result_df['검색수'] = (result_df['검색수'] - result_df['검색수'].min()) / (result_df['검색수'].max() - result_df['검색수'].min()) 

print(result_df.head())


              대분류             중분류             소분류         브랜드         날짜  판매량  \
0  B002-C001-0002  B002-C002-0007  B002-C003-0038  B002-00001 2022-01-01  0.0   
1  B002-C001-0003  B002-C002-0008  B002-C003-0044  B002-00002 2022-01-01  0.0   
2  B002-C001-0003  B002-C002-0008  B002-C003-0044  B002-00002 2022-01-01  0.0   
3  B002-C001-0003  B002-C002-0008  B002-C003-0044  B002-00002 2022-01-01  0.0   
4  B002-C001-0001  B002-C002-0001  B002-C003-0003  B002-00003 2022-01-01  0.0   

        검색수  
0  0.000063  
1  0.000945  
2  0.000945  
3  0.000945  
4  0.000025  


In [229]:
submit.tail()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
15885,15885,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15886,15886,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15887,15887,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15888,15888,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15889,15889,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [230]:
result_df.tail(30)

Unnamed: 0,대분류,중분류,소분류,브랜드,날짜,판매량,검색수
7293480,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,1e-05,8e-06
7293481,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0.0,8e-06
7293482,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0.0,8e-06
7293483,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0.0,8e-06
7293484,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0.0,8e-06
7293485,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0.0,8e-06
7293486,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0.0,8e-06
7293487,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0.0,8e-06
7293488,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0.0,8e-06
7293489,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-03798,2023-04-04,0.0,8e-06


In [231]:
# Label Encoding
label_encoder = LabelEncoder()
categorical_columns = ['대분류', '중분류', '소분류','브랜드'] # '대분류', '중분류', '소분류',

for col in categorical_columns:
    label_encoder.fit(result_df[col])
    result_df[col] = label_encoder.transform(result_df[col])

In [232]:
result_df.head()

Unnamed: 0,대분류,중분류,소분류,브랜드,날짜,판매량,검색수
0,1,6,37,0,2022-01-01,0.0,6.3e-05
1,2,7,43,1,2022-01-01,0.0,0.000945
2,2,7,43,1,2022-01-01,0.0,0.000945
3,2,7,43,1,2022-01-01,0.0,0.000945
4,0,0,2,2,2022-01-01,0.0,2.5e-05


In [233]:
# 훈련 데이터와 테스트 데이터로 분할
train_size = int(0.8 * len(result_df))
train_data = result_df[:train_size]
test_data = result_df[train_size:]

In [234]:
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer

# 결측값을 0으로 대체하는 Imputer 생성
imputer = SimpleImputer(strategy='constant', fill_value=0)

# 피처와 타겟 분리
X_train = train_data[['대분류', '중분류', '소분류', '브랜드', '검색수']]
y_train = train_data['판매량']

# 결측값 대체
X_train_imputed = imputer.fit_transform(X_train)

# 선형 회귀 모델 생성 및 학습
model = LinearRegression()
model.fit(X_train_imputed, y_train)


# 테스트 데이터에 대한 예측
X_test = test_data[['대분류', '중분류', '소분류', '브랜드', '검색수']]
y_test = test_data['판매량']

# 결측값 대체
X_test_imputed = imputer.transform(X_test)

# 예측
y_pred = model.predict(X_test_imputed)

# 평균 제곱근 오차 계산
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
print(f"RMSE: {rmse}")


RMSE: 0.0006932690886170807


In [235]:
submit = pd.read_csv('E:/LG/LG_data/sample_submission.csv')
submit.tail()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
15885,15885,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15886,15886,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15887,15887,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15888,15888,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15889,15889,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [236]:
import pandas as pd

# 첫 번째 CSV 파일 불러오기
csv_file_path1 = 'E:/LG/LG_data/sample_submission.csv'
train_data = pd.read_csv(csv_file_path1)

# '날짜' 열을 행으로 변환
df_meltedsss = pd.melt(train_data, id_vars=['ID'], var_name='날짜', value_name='판매량')

# '날짜' 열의 데이터를 날짜형으로 변환
df_meltedsss['날짜'] = pd.to_datetime(df_melted['날짜'])


In [237]:
df_meltedsss.head()

Unnamed: 0,ID,날짜,판매량
0,0,2022-01-01,0
1,1,2022-01-01,0
2,2,2022-01-01,0
3,3,2022-01-01,0
4,4,2022-01-01,0


In [238]:
submit['판매량'] = y_pred
submit.head()

ValueError: Length of values (1458702) does not match length of index (15890)

In [None]:

submit.to_csv('E:/LG/LG_data/models/baseline_submit_epoch_30_90_4096_512_RAdam_GRU_02_LAYER_33333.csv', index=False)

In [131]:
submit.iloc[:,1:] = y_pred
submit.tail()


ValueError: Must have equal len keys and value when setting with an iterable

In [115]:
result_df.tail()

Unnamed: 0,대분류,중분류,소분류,브랜드,날짜,판매량,검색수
7293505,2,7,41,3169,2023-04-04,0.0,0.000379
7293506,2,7,43,3169,2023-04-04,1.5e-05,0.000379
7293507,2,7,43,3169,2023-04-04,0.0,0.000379
7293508,2,7,43,3169,2023-04-04,1e-05,0.000379
7293509,1,3,19,3169,2023-04-04,0.0,0.000379


In [119]:
train_input, train_target = make_result_data(result_df)
test_input = make_predict_data(result_df)

ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 120 and the array at index 1 has size 2

In [118]:
data_len = len(train_input)
val_ratio = 0.2
test_ratio = 0.2

val_len = int(data_len * val_ratio)
test_len = int(data_len * test_ratio)

val_input = train_input[-val_len:]
val_target = train_target[-val_len:]


train_input = train_input[:-val_len - test_len]
train_target = train_target[:-val_len - test_len]


NameError: name 'train_input' is not defined

In [32]:
train_input.shape, train_target.shape, val_input.shape, val_target.shape, test_input.shape

((4105976, 120, 5),
 (4105976, 21),
 (513247, 120, 5),
 (513247, 21),
 (15890, 120, 5))

### Custom Dataset

In [33]:
class CustomDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        
    def __getitem__(self, index):
        if self.Y is not None:
            return torch.Tensor(self.X[index]), torch.Tensor(self.Y[index])
        return torch.Tensor(self.X[index])
    
    def __len__(self):
        return len(self.X)

In [34]:
train_dataset = CustomDataset(train_input, train_target)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_input, val_target)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

### 모델 선언

## GRU 이용

In [35]:
# import torch.nn as nn
# import torch

# class BaseModel(nn.Module):
#     def __init__(self, input_size=5, hidden_size=512, num_layers=2, output_size=CFG['PREDICT_SIZE']):
#         super(BaseModel, self).__init__()
#         self.hidden_size = hidden_size
#         self.num_layers = num_layers
#         self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True)
#         self.fc = nn.Sequential(
#             nn.Linear(hidden_size, hidden_size // 2),
#             nn.ReLU(),
#             nn.Dropout(),
#             nn.Linear(hidden_size // 2, output_size)
#         )

#         self.actv = nn.ReLU()

#     def forward(self, x):
#         # x shape: (B, TRAIN_WINDOW_SIZE, 5)
#         batch_size = x.size(0)
#         hidden = self.init_hidden(batch_size, x.device)

#         # GRU layer
#         gru_out, hidden = self.gru(x, hidden)

#         # Only use the last output sequencea
#         last_output = gru_out[:, -1, :]

#         # Fully connected layer
#         output = self.actv(self.fc(last_output))

#         return output.squeeze(1)

#     def init_hidden(self, batch_size, device):
#         # Initialize hidden state for all GRU layers
#         return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device)


## 개선된 gru

In [36]:
class ImprovedModel(nn.Module):
    def __init__(self, input_size=5, hidden_size=512, num_layers=2, output_size=CFG['PREDICT_SIZE'], dropout_prob=0.2):
        super(ImprovedModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.ln = nn.LayerNorm(hidden_size)  # Layer Normalization
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Linear(hidden_size // 2, output_size)
        )
        self.actv = nn.ReLU()

    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size, x.device)

        gru_out, hidden = self.gru(x, hidden)
        gru_out = self.dropout(gru_out)
        gru_out = self.ln(gru_out)  # Applying Layer Normalization

        last_output = gru_out[:, -1, :]

        output = self.actv(self.fc(last_output))

        return output.squeeze(1)
    def init_hidden(self, batch_size, device):
        return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device)



## lstm으로 만든것

In [37]:
# import torch.nn as nn
# import torch

# class ImprovedModel(nn.Module):
#     def __init__(self, input_size=5, hidden_size=512, num_layers=2, output_size=CFG['PREDICT_SIZE']):
#         super(ImprovedModel, self).__init__()
#         self.hidden_size = hidden_size
#         self.num_layers = num_layers
#         self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True)  # Using nn.LSTM instead of nn.GRU
#         self.dropout = nn.Dropout(0.5)  # Adding dropout after LSTM
#         self.fc = nn.Sequential(
#             nn.Linear(hidden_size, hidden_size // 2),
#             nn.ReLU(),
#             nn.Linear(hidden_size // 2, output_size)
#         )
#         self.actv = nn.ReLU()  # Using LeakyReLU activation

#     def forward(self, x):
#         batch_size = x.size(0)
#         hidden, cell = self.init_hidden(batch_size, x.device)  # Initializing hidden and cell states for LSTM

#         lstm_out, (hidden, cell) = self.lstm(x, (hidden, cell))  # Using LSTM instead of GRU
#         lstm_out = self.dropout(lstm_out)  # Applying dropout

#         last_output = lstm_out[:, -1, :]

#         output = self.actv(self.fc(last_output))

#         return output.squeeze(1)

#     def init_hidden(self, batch_size, device):
#         return (torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device),
#                 torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device))  # Initializing hidden and cell states for LSTM


In [38]:
def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    best_loss = 9999999
    best_model = None
    
    
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        train_mae = []
        
        for X, Y in tqdm(iter(train_loader)):
            X = X.to(device)
            Y = Y.to(device)

            
            optimizer.zero_grad()
            
            output = model(X)
            loss = criterion(output, Y)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss = validation(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}]')
        

        # # 학습 루프 안에서
        # if best_loss > val_loss:
        #     best_loss = val_loss
        #     best_model = model
        #     print('Model Saved')

        # 학습이 끝난 후
    return model  # 모든 모델을 반환


In [39]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []
    
    with torch.no_grad():
        for X, Y in tqdm(iter(val_loader)):
            X = X.to(device)
            Y = Y.to(device)
            
            output = model(X)
            loss = criterion(output, Y)
            
            val_loss.append(loss.item())
            
    return np.mean(val_loss)

## Run !!

In [40]:
model = ImprovedModel() # BaseModel() 
optimizer = torch.optim.RAdam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
infer_model = train(model, optimizer, train_loader, val_loader, device)

100%|██████████| 1003/1003 [08:55<00:00,  1.87it/s]
100%|██████████| 126/126 [00:31<00:00,  4.03it/s]


Epoch : [1] Train Loss : [0.01697] Val Loss : [0.01657]


100%|██████████| 1003/1003 [09:07<00:00,  1.83it/s]
100%|██████████| 126/126 [00:33<00:00,  3.75it/s]


Epoch : [2] Train Loss : [0.01547] Val Loss : [0.01701]


100%|██████████| 1003/1003 [09:13<00:00,  1.81it/s]
100%|██████████| 126/126 [00:33<00:00,  3.76it/s]


Epoch : [3] Train Loss : [0.01531] Val Loss : [0.01656]


100%|██████████| 1003/1003 [08:33<00:00,  1.95it/s]
100%|██████████| 126/126 [00:27<00:00,  4.59it/s]


Epoch : [4] Train Loss : [0.01553] Val Loss : [0.01655]


100%|██████████| 1003/1003 [08:23<00:00,  1.99it/s]
 33%|███▎      | 41/126 [00:10<00:20,  4.11it/s]

## 모델 추론

In [None]:
test_dataset = CustomDataset(test_input, None)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
def inference(model, test_loader, device):
    predictions = []
    
    with torch.no_grad():
        for X in tqdm(iter(test_loader)):
            X = X.to(device)
            
            output = model(X)
            
            # 모델 출력인 output을 CPU로 이동하고 numpy 배열로 변환
            output = output.cpu().numpy()
            
            predictions.extend(output)
    
    return np.array(predictions)

In [None]:
# pred = inference(infer_model, test_loader, device)
pred = inference(infer_model, test_loader, device)

100%|██████████| 4/4 [00:00<00:00,  5.76it/s]


In [None]:
# 추론 결과를 inverse scaling
for idx in range(len(pred)):
    pred[idx, :] = pred[idx, :]  * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]
    
# 결과 후처리
pred = np.round(pred, 0).astype(int)

In [None]:
pred.shape

(15890, 21)

## Submission

In [None]:
submit = pd.read_csv('E:/LG/LG_data/sample_submission.csv')
submit.tail()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
15885,15885,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15886,15886,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15887,15887,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15888,15888,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15889,15889,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
submit.iloc[:,1:] = pred
submit.tail()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
15885,15885,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
15886,15886,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
15887,15887,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
15888,15888,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
15889,15889,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [None]:
submit.to_csv('E:/LG/LG_data/models/baseline_submit_epoch_30_90_4096_512_RAdam_GRU_02_LAYER_33333.csv', index=False)