In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import seaborn as sns
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader

import tqdm, time, os, datetime, glob, warnings, random
import gc
from sklearn.preprocessing import LabelEncoder
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
warnings.filterwarnings('ignore')

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(41) # Seed 고정

#데이터 불러오기


In [3]:
root = '/content/drive/MyDrive/open'
train_df = original_df = pd.read_csv(f'{root}/train.csv')
sales_df = pd.read_csv(f'{root}/sales.csv')
brand_keyword_df = pd.read_csv(f'{root}/brand_keyword_cnt.csv')
product_info_df = pd.read_csv(f'{root}/product_info.csv')



In [4]:
train_df = pd.melt(train_df, id_vars=['ID','제품','대분류','중분류','소분류','브랜드'])
train_df.rename(columns={'variable':'일자','value':'판매량'},inplace=True)

In [5]:
sales_df = pd.melt(sales_df, id_vars=['ID','제품','대분류','중분류','소분류','브랜드'])
sales_df.rename(columns={'variable':'일자','value':'판매금액'},inplace=True)

In [6]:

train_df = pd.merge(train_df, sales_df, on=['ID','제품','대분류','중분류','소분류','브랜드','일자'], how='left')

In [7]:
brand_keyword_df = pd.melt(brand_keyword_df, id_vars=['브랜드'])
brand_keyword_df.rename(columns={'variable':'일자','value':'언급량'},inplace=True)


In [8]:
train_df = pd.merge(train_df, brand_keyword_df, on=['브랜드','일자'], how='left')

In [9]:
train_df = train_df.sort_values(by=['ID','일자'])

In [10]:
train_df['일자'] = pd.to_datetime(train_df['일자'], format='%Y-%m-%d')


In [11]:
train_df['평균판매금액'] = train_df['판매금액']/train_df['판매량']

In [12]:
train_df = train_df.fillna(0)

In [13]:
del sales_df
del brand_keyword_df
del product_info_df
gc.collect()

35

#전처리

In [14]:
train= train_df.copy()

In [15]:
train.head()

Unnamed: 0,ID,제품,대분류,중분류,소분류,브랜드,일자,판매량,판매금액,언급량,평균판매금액
0,0,B002-00001-00001,B002-C001-0002,B002-C002-0007,B002-C003-0038,B002-00001,2022-01-01,0,0,0.84131,0.0
15890,0,B002-00001-00001,B002-C001-0002,B002-C002-0007,B002-C003-0038,B002-00001,2022-01-02,0,0,0.91383,0.0
31780,0,B002-00001-00001,B002-C001-0002,B002-C002-0007,B002-C003-0038,B002-00001,2022-01-03,0,0,1.45053,0.0
47670,0,B002-00001-00001,B002-C001-0002,B002-C002-0007,B002-C003-0038,B002-00001,2022-01-04,0,0,2.42239,0.0
63560,0,B002-00001-00001,B002-C001-0002,B002-C002-0007,B002-C003-0038,B002-00001,2022-01-05,0,0,1.87119,0.0


In [16]:
def clear_outliar(data,lst):
    id,filter_attr,value,updown,target_attr,mean_sales_revise = lst


    if updown == "이상" :
        idx = data[filter_attr]>= value
    elif updown == "이하" :
        idx = (data[filter_attr]<=value )& (data[filter_attr]>0)
    else :
        print(updown,"이 잘못되었습니다.")

    filter = data[data['판매량']>0]
    if len(filter)>0:

        if mean_sales_revise :
            data.loc[idx,'평균판매금액'] = np.median(filter['평균판매금액'].values)


        if target_attr == "판매금액" :
            data.loc[idx,'판매금액'] = data.loc[idx,'판매량']*data.loc[idx,'평균판매금액']

        elif target_attr == "판매금액" :
            data.loc[idx,'판매량'] = data.loc[idx,'판매금액']/data.loc[idx,'평균판매금액']

        elif target_attr ==  "량금" :

            data.loc[idx,'판매량'] = np.median(filter['판매량'].values)
            data.loc[idx,'판매금액'] = data.loc[idx,'판매량'] * data.loc[idx,'평균판매금액']

    return data,idx

In [17]:

sales = np.zeros((15890*459))
price = np.zeros((15890*459))
mean_price = np.zeros((15890*459))

for i in tqdm.tqdm(range(0,15890*459,459)):
    data = train.iloc[i:i+459]

    s = data['판매량'].values

    z_score = (s-np.mean(s))/np.std(s)
    z_score = s[z_score>3.0]
    if len(z_score) == 0:
        z_score = s.max()
    else :
        z_score = z_score.min()

    data,idx = clear_outliar(data,[i//459,'판매량',z_score,'이상','량금',False])

    sales[i:i+459] = data['판매량']
    price[i:i+459] = data['판매금액']
    mean_price[i:i+459] = data['평균판매금액']

train['판매량'] = sales
train['판매금액'] = price
train['평균판매금액'] = mean_price
del sales
del price
del mean_price
gc.collect()

100%|██████████| 15890/15890 [00:33<00:00, 476.43it/s]


0

In [18]:
def add_group_sales_propotion(train,train_np,group_type,columns):
    for item in columns:
        train[f'{item}비율({group_type})'] = train[item]
    col_idx = list(range(-len(columns),0))
    idx = 0
    for item in tqdm.tqdm(sorted(train[group_type].unique()),desc=f'{group_type}, {columns}'):
        index = original_df[original_df[group_type]==item]['ID'].values
        summation = np.sum(train_np[index],0)

        for k in range(idx,idx+len(index)*459,459):
            train.iloc[k:k+459,col_idx] = train.iloc[k:k+459,col_idx].values / (summation+1e-8)
        idx+=len(index)*459


    return train

In [19]:
group_types = ['소분류']

columns = ['판매금액','평균판매금액']
train_np = train[columns].values.reshape(15890,459,-1)
for t in group_types:
    train = train.sort_values(by=[t,'ID','일자'])
    train = add_group_sales_propotion(train,train_np,t,columns)

train = train.sort_values(by=['ID','일자'])
train.drop(['대분류','중분류'],axis=1,inplace=True)

소분류, ['판매금액', '평균판매금액']: 100%|██████████| 53/53 [00:13<00:00,  3.87it/s]


In [20]:
train['요일'] = train["일자"].dt.dayofweek

'''train['주차'] = train['일자'].dt.weekofyear
train.loc[(train['일자'].dt.month==1)& (train['주차']==52),'주차' ] = 0

week_of_month = np.zeros((459))
idx = 0
for days in [31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,4]:
    week_of_month[idx:idx+days] = train.iloc[idx:idx+days,-1].values - train.iloc[idx,-1]
    idx += days

week_of_month=np.tile(week_of_month,(15890))

train['주차'] = week_of_month'''

"train['주차'] = train['일자'].dt.weekofyear\ntrain.loc[(train['일자'].dt.month==1)& (train['주차']==52),'주차' ] = 0\n\nweek_of_month = np.zeros((459))\nidx = 0\nfor days in [31,28,31,30,31,30,31,31,30,31,30,31,31,28,31,4]:\n    week_of_month[idx:idx+days] = train.iloc[idx:idx+days,-1].values - train.iloc[idx,-1]\n    idx += days\n\nweek_of_month=np.tile(week_of_month,(15890))\n\ntrain['주차'] = week_of_month"

In [21]:
train.head()

Unnamed: 0,ID,제품,소분류,브랜드,일자,판매량,판매금액,언급량,평균판매금액,판매금액비율(소분류),평균판매금액비율(소분류),요일
0,0,B002-00001-00001,B002-C003-0038,B002-00001,2022-01-01,0.0,0.0,0.84131,0.0,0.0,0.0,5
15890,0,B002-00001-00001,B002-C003-0038,B002-00001,2022-01-02,0.0,0.0,0.91383,0.0,0.0,0.0,6
31780,0,B002-00001-00001,B002-C003-0038,B002-00001,2022-01-03,0.0,0.0,1.45053,0.0,0.0,0.0,0
47670,0,B002-00001-00001,B002-C003-0038,B002-00001,2022-01-04,0.0,0.0,2.42239,0.0,0.0,0.0,1
63560,0,B002-00001-00001,B002-C003-0038,B002-00001,2022-01-05,0.0,0.0,1.87119,0.0,0.0,0.0,2


In [22]:
train.describe()

Unnamed: 0,ID,판매량,판매금액,언급량,평균판매금액,판매금액비율(소분류),평균판매금액비율(소분류),요일
count,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0
mean,7944.5,15.31646,108111.0,5.551993,6291.007,0.003248916,0.003248916,3.0
std,4587.048,170.9132,766293.6,31.61561,18495.05,0.01676804,0.01254073,2.005439
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3972.0,0.0,0.0,0.08691974,0.0,0.0,0.0,1.0
50%,7944.5,0.0,0.0,0.49318,0.0,0.0,0.0,3.0
75%,11917.0,3.0,29800.0,2.204829,5200.0,0.000904422,0.002409238,5.0
max,15889.0,51282.0,111930000.0,13383.32,854118.3,1.0,1.0,6.0


In [23]:

train = pd.get_dummies(train, columns = ['요일','소분류'], drop_first=False)
#train = pd.get_dummies(train, columns = ['요일'], drop_first=False)
#train = pd.get_dummies(train, columns = ['주차'], drop_first=False)

label_encoder = LabelEncoder()

categorical_columns = ['브랜드']

for col in categorical_columns:
    label_encoder.fit(train[col])
    train[col] = label_encoder.transform(train[col])

In [24]:
train.drop(['ID','제품','일자'],axis=1,inplace=True)
#train.drop(['제품','일자','브랜드'],axis=1,inplace=True)

In [25]:


reshaped_train = train.values.reshape(15890,459,-1)

scale_columns = ['판매량','판매금액','평균판매금액','언급량']
scale_columns_idx = scale_columns.copy()

for i,col in enumerate(train.columns):
    for j,target_col in enumerate(scale_columns):
        if (col == target_col):
            scale_columns_idx[j] = i


'''median_mean_sales = np.zeros((15890,1))
for i in range(15890):
    median_mean_sales[i] = np.median(reshaped_train[i,reshaped_train[i,:,scale_columns_idx[scale_columns.index('평균판매금액')]]>0,scale_columns_idx[scale_columns.index('평균판매금액')]])'''


#Min Max Scale
values_for_scaling = np.array([np.min(reshaped_train[:,:,scale_columns_idx],1),np.max(reshaped_train[:,:,scale_columns_idx],1),np.min(reshaped_train[:,:,scale_columns_idx],1)])

#Standardzation Scale
#values_for_scaling = np.array([np.mean(reshaped_train[:,:,scale_columns_idx],1),np.std(reshaped_train[:,:,scale_columns_idx],1),np.zeros_like(np.mean(reshaped_train[:,:,scale_columns_idx],1))])

#Robust Scale
#values_for_scaling = np.array([np.quantile(reshaped_train[:,:,scale_columns_idx], 0.5,1),np.quantile(reshaped_train[:,:,scale_columns_idx], 0.75,1),np.quantile(reshaped_train[:,:,scale_columns_idx], 0.25,1)]) #a,b,c

for i in tqdm.tqdm(range(0,train.shape[0],459)):

    for j in range(len(scale_columns)):

        if (values_for_scaling[1,i//459,j] == values_for_scaling[2,i//459,j]):
            train.iloc[i:i+459,scale_columns_idx[j]] = 0
        else:
            train.iloc[i:i+459,scale_columns_idx[j]] = (train.iloc[i:i+459,scale_columns_idx[j]]-values_for_scaling[0,i//459,j])/(values_for_scaling[1,i//459,j]-values_for_scaling[2,i//459,j])

100%|██████████| 15890/15890 [00:31<00:00, 499.04it/s]


In [26]:
train.columns

Index(['브랜드', '판매량', '판매금액', '언급량', '평균판매금액', '판매금액비율(소분류)', '평균판매금액비율(소분류)',
       '요일_0', '요일_1', '요일_2', '요일_3', '요일_4', '요일_5', '요일_6',
       '소분류_B002-C003-0001', '소분류_B002-C003-0002', '소분류_B002-C003-0003',
       '소분류_B002-C003-0004', '소분류_B002-C003-0005', '소분류_B002-C003-0006',
       '소분류_B002-C003-0007', '소분류_B002-C003-0008', '소분류_B002-C003-0009',
       '소분류_B002-C003-0010', '소분류_B002-C003-0011', '소분류_B002-C003-0012',
       '소분류_B002-C003-0013', '소분류_B002-C003-0014', '소분류_B002-C003-0015',
       '소분류_B002-C003-0016', '소분류_B002-C003-0017', '소분류_B002-C003-0018',
       '소분류_B002-C003-0019', '소분류_B002-C003-0020', '소분류_B002-C003-0021',
       '소분류_B002-C003-0022', '소분류_B002-C003-0023', '소분류_B002-C003-0024',
       '소분류_B002-C003-0025', '소분류_B002-C003-0026', '소분류_B002-C003-0027',
       '소분류_B002-C003-0028', '소분류_B002-C003-0029', '소분류_B002-C003-0030',
       '소분류_B002-C003-0031', '소분류_B002-C003-0032', '소분류_B002-C003-0033',
       '소분류_B002-C003-0034', '소분류_B002-C003-0035', '소분류_

In [27]:
train.head()

Unnamed: 0,브랜드,판매량,판매금액,언급량,평균판매금액,판매금액비율(소분류),평균판매금액비율(소분류),요일_0,요일_1,요일_2,...,소분류_B002-C003-0044,소분류_B002-C003-0045,소분류_B002-C003-0046,소분류_B002-C003-0047,소분류_B002-C003-0048,소분류_B002-C003-0049,소분류_B002-C003-0050,소분류_B002-C003-0051,소분류_B002-C003-0052,소분류_B002-C003-0053
0,0,0.0,0.0,0.214576,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15890,0,0.0,0.0,0.234817,0.0,0.0,0.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
31780,0,0.0,0.0,0.384615,0.0,0.0,0.0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
47670,0,0.0,0.0,0.65587,0.0,0.0,0.0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
63560,0,0.0,0.0,0.502025,0.0,0.0,0.0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [28]:
train.describe()

Unnamed: 0,브랜드,판매량,판매금액,언급량,평균판매금액,판매금액비율(소분류),평균판매금액비율(소분류),요일_0,요일_1,요일_2,...,소분류_B002-C003-0044,소분류_B002-C003-0045,소분류_B002-C003-0046,소분류_B002-C003-0047,소분류_B002-C003-0048,소분류_B002-C003-0049,소분류_B002-C003-0050,소분류_B002-C003-0051,소분류_B002-C003-0052,소분류_B002-C003-0053
count,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,...,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0,7293510.0
mean,1585.399,0.133301,0.1290481,0.1823969,0.2801315,0.003248916,0.003248916,0.1437908,0.1437908,0.1416122,...,0.01472624,0.0185022,0.01403398,0.006985525,0.01019509,0.01220894,0.002139711,0.01006923,0.0246696,0.01390812
std,903.6647,0.2348897,0.2263161,0.1908159,0.3931416,0.01676804,0.01254073,0.3508776,0.3508776,0.348652,...,0.1204549,0.1347586,0.1176309,0.08328703,0.1004547,0.1098175,0.04620749,0.09983906,0.1551161,0.1171097
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,748.0,0.0,0.0,0.01258612,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1618.0,0.0,0.0,0.1291826,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,2351.0,0.2,0.1961916,0.2883014,0.697479,0.000904422,0.002409238,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,3169.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [29]:
del reshaped_train
#del week_of_month
gc.collect()

19

#데이터로더 작성

In [30]:
if "train_x" in locals():
    del train_x
    del train_y
    del train_set
    del valid_x
    del valid_y
    del valid_set
    gc.collect()

class forecast(Dataset):
    def __init__(self, x,y,window_size,output_size):
        self.x = x
        self.y = y
        self.window_size = window_size
        self.output_size = output_size
        self.shape = self.x.shape

    def __len__(self):
        return self.shape[0]*(self.shape[1]-(self.window_size+self.output_size)+1)

    def __getitem__(self,idx):
        product_num = idx // (self.shape[1]-(self.window_size+self.output_size)+1)

        middle_idx = idx % (self.shape[1]-(self.window_size+self.output_size)+1) + self.window_size


        input_tensor = self.x[product_num, middle_idx-self.window_size: middle_idx]
        output_tensor = self.y[product_num,  middle_idx:middle_idx+self.output_size]

        return input_tensor, output_tensor

target_cols = ['판매금액','평균판매금액']
#target_cols = ['판매량']

window_size = 90
output_size = 21
in_features = train.shape[-1]

days = int((459-((window_size+output_size)+1)*2)*0.8)+window_size+output_size

#전반 80% 후반 20%를 Train/Valid로 나눔
train_x = torch.from_numpy(np.reshape(train.values,(-1,459,in_features))[:,:days]).float()
train_y = torch.from_numpy(np.reshape(train[target_cols].values,(-1,459,len(target_cols)))[:,:days]).float()
valid_x = torch.from_numpy(np.reshape(train.values,(-1,459,in_features))[:,days:]).float()
valid_y = torch.from_numpy(np.reshape(train[target_cols].values,(-1,459,len(target_cols)))[:,days:]).float()

#전반 20% 후반 80%를 Valid/Train로 나눔
'''train_x = torch.from_numpy(np.reshape(train.values,(-1,459,in_features))[:,-days:]).float()
train_y = torch.from_numpy(np.reshape(train[target_cols].values,(-1,459,len(target_cols)))[:,-days:]).float()
valid_x = torch.from_numpy(np.reshape(train.values,(-1,459,in_features))[:,:-days]).float()
valid_y = torch.from_numpy(np.reshape(train[target_cols].values,(-1,459,len(target_cols)))[:,:-days]).float()'''

print(train_x.shape,train_y.shape)
print(valid_x.shape,valid_y.shape)

train_set = forecast(train_x,train_y,window_size,output_size)
valid_set = forecast(valid_x,valid_y,window_size,output_size)

print(len(train_set)/15890,len(valid_set)/15890)

torch.Size([15890, 299, 67]) torch.Size([15890, 299, 2])
torch.Size([15890, 160, 67]) torch.Size([15890, 160, 2])
189.0 50.0


In [31]:
#189,50
#288.0 75.0

In [32]:

ds = iter(train_set)


for i in range(1):
    input,output = next(ds)
    print(input.shape)
    print(output.shape)
    print(input)

    print(output)

torch.Size([90, 67])
torch.Size([21, 2])
tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.2000, 0.2429,  ..., 0.0000, 0.0000, 0.0000]])
tensor([[0.0000, 0.0000],
        [0.3643, 0.2519],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.1214, 0.2519],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.2429, 0.2519],
        [0.2429, 0.2519],
        [0.2429, 0.2519],
        [0.1214, 0.2519],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.0000]])


# 모델작성

In [33]:
class Custom_LSTM(nn.Module):
    def __init__(self, input_size=in_features, hidden_size=512, output_size=output_size, num_layers=1, dropout=0, bidirectional=True):
        super(Custom_LSTM, self).__init__()


        self.D = (1 + bidirectional)

        self.num_layers = num_layers

        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, bidirectional=bidirectional)

        self.fc = nn.Sequential(
            nn.Linear(self.D*hidden_size, self.D*hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(self.D*hidden_size, output_size*train_y.shape[-1])
        )

        self.output_size = output_size

    def forward(self, x):
        b,w,c = x.shape
        hidden = self.init_hidden(b, x.device)
        lstm_out, hidden = self.lstm(x, hidden)

        last_output = lstm_out[:, -1, :]

        output = self.fc(last_output).reshape(b,self.output_size,train_y.shape[-1])

        return output

    def init_hidden(self, batch_size, device):
        return (torch.zeros(self.D*self.num_layers, batch_size, self.hidden_size, device=device),
                torch.zeros(self.D*self.num_layers, batch_size, self.hidden_size, device=device))

In [34]:
class Embedding_LSTM(nn.Module):
    def __init__(self, input_size=in_features, hidden_size=512, output_size=output_size, num_layers=1, dropout=0, bidirectional=True, Embedding_size = 8):
        super(Embedding_LSTM, self).__init__()


        self.D = (1 + bidirectional)

        self.num_layers = num_layers

        self.hidden_size = hidden_size


        self.Embedding = nn.Embedding(3170,Embedding_size)

        self.lstm = nn.LSTM(input_size+Embedding_size-1, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, bidirectional=bidirectional)

        self.fc = nn.Sequential(
            nn.Linear(self.D*hidden_size, self.D*hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(self.D*hidden_size, output_size*train_y.shape[-1])
        )


        self.output_size = output_size

    def forward(self, x):
        b,w,c = x.shape
        Embedding = self.Embedding(x[:,:,0].long())

        input = torch.cat([Embedding,x[:,:,1:]],-1)
        hidden = self.init_hidden(b, input.device)
        lstm_out, hidden = self.lstm(input, hidden)

        last_output = lstm_out[:, -1, :]

        output = self.fc(last_output).reshape(b,self.output_size,train_y.shape[-1])

        return output

    def init_hidden(self, batch_size, device):
        return (torch.zeros(self.D*self.num_layers, batch_size, self.hidden_size, device=device),
                torch.zeros(self.D*self.num_layers, batch_size, self.hidden_size, device=device))

#PSFA

In [41]:
values_for_reverse_scaling = torch.zeros((3,15890,1,len(target_cols))).to(device)


for i in range(15890):
    for j,col in enumerate(target_cols):
        for k in range(3):
            values_for_reverse_scaling[k,i,:,j] = values_for_scaling[k,i,scale_columns.index(col)]


print(values_for_reverse_scaling[0])
print(values_for_reverse_scaling[1])
print(values_for_reverse_scaling[2])

print(values_for_reverse_scaling.shape)

tensor([[[0., 0.]],

        [[0., 0.]],

        [[0., 0.]],

        ...,

        [[0., 0.]],

        [[0., 0.]],

        [[0., 0.]]], device='cuda:0')
tensor([[[ 28000.,  13500.]],

        [[163800.,  37800.]],

        [[112980.,  12900.]],

        ...,

        [[232600.,  14425.]],

        [[122500.,  14900.]],

        [[ 49800.,  49800.]]], device='cuda:0')
tensor([[[0., 0.]],

        [[0., 0.]],

        [[0., 0.]],

        ...,

        [[0., 0.]],

        [[0., 0.]],

        [[0., 0.]]], device='cuda:0')
torch.Size([3, 15890, 1, 2])


In [42]:
def PSFA(pred,true,values_for_reverse_scaling,group_item,with_price=True):
    n,d,f = pred.shape # num_item,days,forecast

    pred = pred*(values_for_reverse_scaling[1]-values_for_reverse_scaling[2])+values_for_reverse_scaling[0]
    pred[pred<=0] = 0

    true = true*(values_for_reverse_scaling[1]-values_for_reverse_scaling[2])+values_for_reverse_scaling[0]
    true[true<=0] = 0

    if with_price :
        #pred = pred[:,:,0]/torch.from_numpy(median_mean_sales[group_item]).to(device)#pred[:,:,1]
        pred = pred[:,:,0]/pred[:,:,1]
        pred = torch.nan_to_num(pred, nan=0.0, posinf=0, neginf=0)

        #true = true[:,:,0]/torch.from_numpy(median_mean_sales[group_item]).to(device)#true[:,:,1]
        true = true[:,:,0]/true[:,:,1]
        true = torch.nan_to_num(true, nan=0.0, posinf=0, neginf=0)


    ret = 1- torch.sum((torch.abs(true-pred)*true)/(1e-8+torch.maximum(true,pred)*torch.sum(true,0)))/d

    return ret

In [43]:
def get_psfa(input,true,with_price=True):

    group_item = []
    for item in original_df['대분류'].unique():
        group_item.append(original_df[original_df['대분류']==item]['ID'].unique())
    psfa = np.array([0.0, 0.0, 0.0, 0.0, 0.0])

    for i in tqdm.tqdm(range(window_size,input.shape[1]-output_size+1)):
        x = input[:, i-window_size: i].to(device)
        y = true[:,  i:i+output_size].to(device)

        pred = torch.zeros((15890,output_size,train_y.shape[-1])).to(device)

        model.eval()
        with torch.no_grad():
            for j in range(0,15890,1024):
                pred[j:j+1024] = model(x[j:j+1024])

            for j in range(len(group_item)):

                psfa[j] += PSFA(pred[group_item[j]],y[group_item[j]],values_for_reverse_scaling[:,group_item[j]],group_item[j],with_price).cpu()
    return psfa

#학습

In [63]:
class RMSELoss(nn.Module):
    def __init__(self):
        super(RMSELoss, self).__init__()

    def forward(self, y_pred, y_true):
        squared_errors = (y_pred - y_true) ** 2
        mean_squared_error = torch.mean(squared_errors)
        rmse = torch.sqrt(mean_squared_error)
        return rmse

In [69]:
torch.cuda.empty_cache()

model = Custom_LSTM( num_layers=4, dropout=0, bidirectional=True )

'''model = Embedding_LSTM( num_layers=1, dropout=0.5, bidirectional=False, Embedding_size = 4 )
loaded_state_dict = torch.load('/content/drive/MyDrive/lgaimers/Brand_Embedding.pt') # nn.Embedding(15890,8)의 결과를 제품,대분류,중분류,소분류,브랜드를 분류할 수 있도록 1000 에폭동안 학습 시킨 것
model.Embedding.weight.data = loaded_state_dict['Embedding.weight']
model.Embedding.requires_grad = True'''

train_losses = []
valid_losses = []

train_psfas = []
valid_psfas = []

lrs = []
min_epoch = 0
max_lr = 1e-4

model_save_path = '/content/drive/MyDrive/lgaimers/Uni-LSTM,4week/'

In [70]:
gc.collect()
batch_size = 1024

model = model.to(device)
#criterion = nn.MSELoss()
#criterion = nn.L1Loss()
criterion = RMSELoss()

optimizer = torch.optim.Adam(model.parameters(),lr=max_lr)

scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda epoch: 0.95 ** epoch)

In [71]:
epochs = 50
train_loader = DataLoader(train_set, batch_size=batch_size,pin_memory=True, num_workers=1, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size,pin_memory=True, num_workers=1, shuffle=False)

In [67]:

def train_begin(training,loader,running_loss):
    if training :
        desc = "Train"
    else:
        desc = "Valid"

    progress = tqdm.tqdm(loader,desc=f'Epoch:{epoch+1}/{epochs}')
    for i,data in enumerate(progress):
        x,y = data
        if (training):
            optimizer.zero_grad()

        x = x.to(device)
        y = y.to(device)

        pred = model(x)

        loss = criterion(pred, y)

        if (training):
            loss.backward()

            optimizer.step()

        running_loss += [loss.detach().cpu().numpy()]

        progress.set_description(f'Epoch:{epoch+1}/{epochs} | {desc}_Loss:{np.round(running_loss/(i+1),6)}')

In [72]:

fit_time = time.time()
start_epoch = len(lrs)

for i in range(len(valid_losses)):
    print(f'Epoch:{i+1}/{epochs} | Train_Loss:{np.round(train_losses[i],4)} | Train_PSFA:{np.round(train_psfas[i],4)}')
    print(f'Epoch:{i+1}/{epochs} | Valid_Loss:{np.round(valid_losses[i],4)} | Valid_PSFA:{np.round(valid_psfas[i],4)}')
    print()

for epoch in range(start_epoch,epochs):

    if epoch != 0 :
        scheduler.step()
        print("lr이 변경되었습니다.",optimizer.param_groups[0]['lr'])


    running_train_loss = np.array([0.0])
    running_valid_loss = np.array([0.0])

    model.train()
    train_begin(True,train_loader,running_train_loss)
    model.eval()
    with torch.no_grad():
        train_begin(False,valid_loader,running_valid_loss)


    train_losses.append((running_train_loss/len(train_loader)))
    valid_losses.append((running_valid_loss/len(valid_loader)))
    lrs.append(optimizer.param_groups[0]['lr'])

    if (os.path.exists(model_save_path)==False):
        os.makedirs(model_save_path,exist_ok=True)

    train_psfa = get_psfa(train_x,train_y,target_cols[0]=='판매금액')
    train_psfas.append(np.sum(train_psfa/(len(train_set)//15890))/5)
    print(train_psfas[-1])

    valid_psfa = get_psfa(valid_x,valid_y,target_cols[0]=='판매금액')
    valid_psfas.append(np.sum(valid_psfa/(len(valid_set)//15890))/5)
    print(valid_psfas[-1])

    torch.save(model, f'{model_save_path}Epoch({epoch+1}).pt')
    np.save(model_save_path+'tl',train_losses)
    np.save(model_save_path+'vl',valid_losses)
    np.save(model_save_path+'tp',train_psfas)
    np.save(model_save_path+'vp',valid_psfas)
    np.save(model_save_path+'lrs',lrs)



    if sum(valid_losses[min_epoch]) >= sum(valid_losses[-1]):
        print(f'Valid Loss가 최소가 됐습니다. ({sum(valid_losses[min_epoch]):.4f}({min_epoch+1}) -> {sum(valid_losses[-1]):.4f}({len(valid_losses)}))')
        print(f'해당 모델이 {model_save_path}Best.pt 경로에 저장됩니다.')
        min_epoch = len(valid_losses)-1
        torch.save(model, model_save_path+'Best.pt')
    else:
        print(f'Valid_Loss가 최소가 되지 못했습니다.(최소 Epoch:{min_epoch+1} : {sum(valid_losses[min_epoch]):.4f})')
    print('')

print('학습 최종 시간: {:.2f} 분\n' .format((time.time()- fit_time)/60))

Epoch:1/50 | Train_Loss:[0.238385]: 100%|██████████| 2933/2933 [07:47<00:00,  6.28it/s]
Epoch:1/50 | Valid_Loss:[0.224185]: 100%|██████████| 776/776 [00:44<00:00, 17.59it/s]
100%|██████████| 189/189 [03:05<00:00,  1.02it/s]


0.5936715100808119


100%|██████████| 50/50 [00:48<00:00,  1.04it/s]


0.5575101318359375
Valid Loss가 최소가 됐습니다. (0.2242(1) -> 0.2242(1))
해당 모델이 /content/drive/MyDrive/lgaimers/Uni-LSTM,4week/Best.pt 경로에 저장됩니다.

lr이 변경되었습니다. 9.5e-05


Epoch:2/50 | Train_Loss:[0.217927]: 100%|██████████| 2933/2933 [07:47<00:00,  6.27it/s]
Epoch:2/50 | Valid_Loss:[0.236735]: 100%|██████████| 776/776 [00:44<00:00, 17.63it/s]
100%|██████████| 189/189 [03:05<00:00,  1.02it/s]


0.642717537047371


100%|██████████| 50/50 [00:48<00:00,  1.04it/s]


0.6088264694213866
Valid_Loss가 최소가 되지 못했습니다.(최소 Epoch:1 : 0.2242)

lr이 변경되었습니다. 9.025e-05


Epoch:3/50 | Train_Loss:[0.215283]: 100%|██████████| 2933/2933 [07:47<00:00,  6.27it/s]
Epoch:3/50 | Valid_Loss:[0.223114]: 100%|██████████| 776/776 [00:43<00:00, 17.64it/s]
100%|██████████| 189/189 [03:04<00:00,  1.02it/s]


0.6491068764338418


100%|██████████| 50/50 [00:48<00:00,  1.03it/s]


0.6192983016967772
Valid Loss가 최소가 됐습니다. (0.2242(1) -> 0.2231(3))
해당 모델이 /content/drive/MyDrive/lgaimers/Uni-LSTM,4week/Best.pt 경로에 저장됩니다.

lr이 변경되었습니다. 8.573749999999999e-05


Epoch:4/50 | Train_Loss:[0.212322]: 100%|██████████| 2933/2933 [07:47<00:00,  6.27it/s]
Epoch:4/50 | Valid_Loss:[0.220637]: 100%|██████████| 776/776 [00:43<00:00, 17.72it/s]
100%|██████████| 189/189 [03:05<00:00,  1.02it/s]


0.6646840978551793


100%|██████████| 50/50 [00:48<00:00,  1.04it/s]


0.637713981628418
Valid Loss가 최소가 됐습니다. (0.2231(3) -> 0.2206(4))
해당 모델이 /content/drive/MyDrive/lgaimers/Uni-LSTM,4week/Best.pt 경로에 저장됩니다.

lr이 변경되었습니다. 8.1450625e-05


Epoch:5/50 | Train_Loss:[0.211375]: 100%|██████████| 2933/2933 [07:47<00:00,  6.28it/s]
Epoch:5/50 | Valid_Loss:[0.218832]: 100%|██████████| 776/776 [00:44<00:00, 17.60it/s]
100%|██████████| 189/189 [03:05<00:00,  1.02it/s]


0.666559015506159


100%|██████████| 50/50 [00:48<00:00,  1.04it/s]


0.636822135925293
Valid Loss가 최소가 됐습니다. (0.2206(4) -> 0.2188(5))
해당 모델이 /content/drive/MyDrive/lgaimers/Uni-LSTM,4week/Best.pt 경로에 저장됩니다.

lr이 변경되었습니다. 7.737809374999998e-05


Epoch:6/50 | Train_Loss:[0.21046]: 100%|██████████| 2933/2933 [07:47<00:00,  6.27it/s]
Epoch:6/50 | Valid_Loss:[0.225248]: 100%|██████████| 776/776 [00:43<00:00, 17.66it/s]
100%|██████████| 189/189 [03:05<00:00,  1.02it/s]


0.6713683516890915


100%|██████████| 50/50 [00:48<00:00,  1.04it/s]


0.6381230163574219
Valid_Loss가 최소가 되지 못했습니다.(최소 Epoch:5 : 0.2188)

lr이 변경되었습니다. 7.350918906249998e-05


Epoch:7/50 | Train_Loss:[0.20884]: 100%|██████████| 2933/2933 [07:47<00:00,  6.27it/s]
Epoch:7/50 | Valid_Loss:[0.232493]: 100%|██████████| 776/776 [00:44<00:00, 17.62it/s]
100%|██████████| 189/189 [03:05<00:00,  1.02it/s]


0.6638642326233879


100%|██████████| 50/50 [00:47<00:00,  1.04it/s]


0.6401108779907226
Valid_Loss가 최소가 되지 못했습니다.(최소 Epoch:5 : 0.2188)

lr이 변경되었습니다. 6.983372960937497e-05


Epoch:8/50 | Train_Loss:[0.206253]: 100%|██████████| 2933/2933 [07:47<00:00,  6.27it/s]
Epoch:8/50 | Valid_Loss:[0.232059]: 100%|██████████| 776/776 [00:44<00:00, 17.55it/s]
100%|██████████| 189/189 [03:05<00:00,  1.02it/s]


0.6742771451435392


100%|██████████| 50/50 [00:48<00:00,  1.04it/s]


0.6407309036254883
Valid_Loss가 최소가 되지 못했습니다.(최소 Epoch:5 : 0.2188)

lr이 변경되었습니다. 6.634204312890623e-05


Epoch:9/50 | Train_Loss:[0.202372]: 100%|██████████| 2933/2933 [07:47<00:00,  6.27it/s]
Epoch:9/50 | Valid_Loss:[0.237213]: 100%|██████████| 776/776 [00:44<00:00, 17.62it/s]
100%|██████████| 189/189 [03:05<00:00,  1.02it/s]


0.6743968903072297


100%|██████████| 50/50 [00:48<00:00,  1.04it/s]


0.6454145584106445
Valid_Loss가 최소가 되지 못했습니다.(최소 Epoch:5 : 0.2188)

lr이 변경되었습니다. 6.30249409724609e-05


Epoch:10/50 | Train_Loss:[0.197831]: 100%|██████████| 2933/2933 [07:47<00:00,  6.27it/s]
Epoch:10/50 | Valid_Loss:[0.245243]: 100%|██████████| 776/776 [00:44<00:00, 17.63it/s]
100%|██████████| 189/189 [03:04<00:00,  1.02it/s]


0.6730194414734211


 88%|████████▊ | 44/50 [00:42<00:05,  1.02it/s]


KeyboardInterrupt: ignored

#과거 학습 이어서 하기

In [153]:
#모델 학습 상황 출력
model_save_path = '/content/drive/MyDrive/lgaimers/Uni-LSTM,4weeks'

train_losses = np.load(f'{model_save_path}/tl.npy').tolist()
valid_losses = np.load(f'{model_save_path}/vl.npy').tolist()

train_psfas = np.load(f'{model_save_path}/tp.npy').tolist()
valid_psfas = np.load(f'{model_save_path}/vp.npy').tolist()

lrs = np.load(f'{model_save_path}/lrs.npy').tolist()
for i in range(len(train_losses)):
    print(i+1,train_losses[i],valid_losses[i])
    print(i+1,train_psfas[i],valid_psfas[i])

max_lr = lrs[-1]
model = torch.load(f'{model_save_path}/Epoch({len(valid_losses)}).pt').to(device)
min_epoch = np.argmin(valid_losses)

print(np.argmin(valid_losses)+1,np.min(valid_losses))

1 [0.23499313737095887] [0.22410208496658765]
1 0.6520223223973833 0.6147996444702148
2 [0.21723560702361794] [0.21974136574030614]
2 0.6560108729771205 0.6179961166381835
3 [0.21609456736074956] [0.2304151751648289]
3 0.6564433990962921 0.6230817413330078
4 [0.21550128112321335] [0.2227449581451887]
4 0.6533810570126488 0.6126895523071288
5 [0.21488464513696423] [0.2305770234358449]
5 0.6407948004505621 0.6147012405395509
6 [0.21429061531471874] [0.22909434375919632]
6 0.6545616271003843 0.6249274520874024
7 [0.21375936587292546] [0.2320450030977732]
7 0.6550140703796709 0.6279135055541992
8 [0.21323235524501405] [0.2317490018544162]
8 0.6598802435335027 0.6246457443237304
9 [0.21282346628446283] [0.22699835895531878]
9 0.6564229652364417 0.6301397018432617
10 [0.21256517205594386] [0.21794303916586735]
10 0.666011636723917 0.633115135192871
11 [0.21228673223989067] [0.2270991581986591]
11 0.6429368841585028 0.6200881271362304
12 [0.21197230035316567] [0.22495613687614113]
12 0.658202

#제출용 파일 생성

In [44]:
'''def test_with_outliar():

    date = original_df.columns[6:]
    start = np.where(date == '2023-02-23')[0][0]
    end = np.where(date == '2023-03-28')[0][0]
    without_outliar = np.delete( np.reshape(train.values,(15890,-1,in_features)),list(range(start,end+1)),1)[:,-window_size:]
    return without_outliar
test = torch.from_numpy(test_with_outliar()).float().to(device)'''


"def test_with_outliar():\n\n    date = original_df.columns[6:]\n    start = np.where(date == '2023-02-23')[0][0]\n    end = np.where(date == '2023-03-28')[0][0]\n    without_outliar = np.delete( np.reshape(train.values,(15890,-1,in_features)),list(range(start,end+1)),1)[:,-window_size:]\n    return without_outliar\ntest = torch.from_numpy(test_with_outliar()).float().to(device)"

In [45]:
test = np.reshape(train.values,(15890,-1,in_features))[:,:]
test = torch.from_numpy(test).float().to(device)
gc.collect()

0

In [46]:
path = '/content/drive/MyDrive/lgaimers/Uni-LSTM,4week'

In [47]:
for i in range(1,16):
    model = torch.load(f'{path}/Epoch({i}).pt').to(device)
    model.eval()
    pred = torch.zeros((15890,21,2))
    with torch.no_grad():
        for j in range(0,15890,1024):
            pred[j:j+1024] = model(test[j:j+1024])
    print(i,(pred[:,:,0]<0).sum(),(pred[:,:,1]<0).sum(),(pred<0).sum())

1 tensor(157) tensor(187) tensor(344)
2 tensor(101586) tensor(85795) tensor(187381)
3 tensor(3782) tensor(26226) tensor(30008)
4 tensor(1579) tensor(620) tensor(2199)
5 tensor(50317) tensor(38421) tensor(88738)
6 tensor(14532) tensor(2187) tensor(16719)
7 tensor(16352) tensor(8035) tensor(24387)
8 tensor(8427) tensor(1476) tensor(9903)
9 tensor(5139) tensor(390) tensor(5529)
10 tensor(2839) tensor(351) tensor(3190)
11 tensor(3321) tensor(198) tensor(3519)
12 tensor(7293) tensor(95) tensor(7388)
13 tensor(5278) tensor(284) tensor(5562)
14 tensor(11658) tensor(1215) tensor(12873)
15 tensor(10198) tensor(1389) tensor(11587)


In [48]:
model = torch.load(f'{path}/Epoch(15).pt').to(device)

In [49]:
model.eval()
pred = torch.zeros((15890,21,2)).to(device)
with torch.no_grad():
    for i in range(0,15890,1024):
        pred[i:i+1024] = model(test[i:i+1024])

In [50]:
pred = pred*(values_for_reverse_scaling[1]-values_for_reverse_scaling[2])+values_for_reverse_scaling[0]
print(pred)

tensor([[[ -129.3579,   537.5431],
         [  763.2554,   548.3760],
         [  373.3102,   461.6502],
         ...,
         [ 1369.2937,   628.9977],
         [ 1091.9342,   682.3336],
         [  935.2530,   518.1526]],

        [[19816.3809, 10956.9336],
         [30863.5684, 13618.1328],
         [37765.4883, 14698.5020],
         ...,
         [25299.5488, 11586.1680],
         [18887.3223, 11194.4775],
         [19294.1465, 11309.2822]],

        [[ 2025.1229,   201.0066],
         [ 7230.8330,   832.3928],
         [ 6137.5400,  1206.9567],
         ...,
         [14350.9658,  3839.9336],
         [15636.6914,  3978.4128],
         [15833.2529,  4433.5601]],

        ...,

        [[ 6505.7988,   498.0061],
         [ 6907.1616,   843.6465],
         [11022.1807,   941.9076],
         ...,
         [16559.6230,  2384.4045],
         [15768.7910,  2202.7373],
         [16579.0625,  2322.1321]],

        [[34343.4883, 10612.0596],
         [36521.7070,  9761.5977],
         [28

In [51]:
pred[pred<=0] = 0
if target_cols[0]=='판매금액' :
    pred = pred[:,:,0]/pred[:,:,1]
    print('a')
    #pred = pred[:,:,0]/torch.from_numpy(median_mean_sales).to(device)
else :
    pred = pred[:,:,0]
pred = torch.nan_to_num(pred, nan=0.0, posinf=0, neginf=0)
print(pred)

a
tensor([[ 0.0000,  1.3918,  0.8086,  ...,  2.1769,  1.6003,  1.8050],
        [ 1.8086,  2.2664,  2.5693,  ...,  2.1836,  1.6872,  1.7060],
        [10.0749,  8.6868,  5.0851,  ...,  3.7373,  3.9304,  3.5712],
        ...,
        [13.0637,  8.1873, 11.7020,  ...,  6.9450,  7.1587,  7.1396],
        [ 3.2363,  3.7414,  3.5048,  ...,  3.5151,  3.3504,  3.1396],
        [ 0.9966,  0.5945,  0.7647,  ...,  0.4796,  0.5334,  0.5720]],
       device='cuda:0')


In [52]:
#result = np.round(pred.detach().cpu().numpy(),0)
result = pred.detach().cpu().numpy()
print(result)

[[ 0.          1.3918467   0.80864304 ...  2.1769454   1.6002939
   1.804976  ]
 [ 1.8085699   2.2663584   2.5693426  ...  2.1835992   1.6872
   1.7060452 ]
 [10.074907    8.686804    5.085137   ...  3.7372954   3.9303844
   3.5712278 ]
 ...
 [13.063694    8.187269   11.701977   ...  6.944972    7.1587253
   7.1395864 ]
 [ 3.23627     3.7413657   3.504811   ...  3.5151274   3.350436
   3.1395714 ]
 [ 0.996571    0.5945166   0.7647255  ...  0.47960475  0.5333855
   0.5719916 ]]


In [54]:
submit = pd.read_csv(f'{root}/sample_submission.csv')
submit.iloc[:,1:] = result
submit.head(50)

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,0.0,1.391847,0.808643,1.249671,0.945171,0.0,0.145079,0.420211,0.367943,...,2.325213,4.027974,0.98015,3.6694,0.811478,2.66774,2.187782,2.176945,1.600294,1.804976
1,1,1.80857,2.266358,2.569343,2.829521,2.370828,2.245547,2.246221,2.257312,2.711797,...,2.434697,2.259407,2.032149,2.073261,2.327963,2.868234,2.472914,2.183599,1.6872,1.706045
2,2,10.074907,8.686804,5.085137,6.046232,4.679664,4.905943,4.423695,5.859787,5.497613,...,4.238907,4.580894,4.495837,3.643729,3.936167,4.321255,4.347471,3.737295,3.930384,3.571228
3,3,8.375004,8.055541,5.257615,6.083681,5.57605,5.878366,5.513646,7.048862,6.178334,...,5.127876,5.389464,5.669465,3.878528,4.418717,5.228044,4.910802,4.097291,4.012803,3.436016
4,4,0.0,0.0,0.0,0.0,0.0,7.854422,10.990368,6.896644,7.321042,...,9.365767,9.292787,8.977776,2.644585,2.217104,2.204863,7.007628,8.492375,8.087751,8.988188
5,5,0.0,0.0,0.0,0.0,147.997421,4.59214,7.558278,4.347155,5.124327,...,6.031748,6.074632,5.828967,1.625724,1.300075,1.268376,4.673478,5.457235,5.371216,5.994731
6,6,4.734375,0.0,0.0,0.0,174.066727,5.380468,8.860186,5.100146,6.008591,...,7.071372,7.123971,6.832649,1.903946,1.524264,1.488241,5.481586,6.398734,6.29418,7.028578
7,7,0.0,0.0,0.0,0.0,1246.355713,37.915283,62.456459,35.95583,42.352539,...,49.842701,50.22068,48.164921,13.410765,10.736161,10.483322,38.634163,45.10532,44.361961,49.547806
8,8,1.124406,1.356149,1.328984,1.256871,1.308071,1.179506,1.260524,1.064171,1.316831,...,1.321012,1.346227,1.239484,1.256882,1.332023,1.362762,1.386261,1.323773,1.391168,1.286687
9,9,0.0,0.0,0.0,0.0,87.626587,4.142594,7.220232,3.975522,4.874536,...,5.706547,5.749309,5.494081,1.52082,1.255172,1.16575,4.477444,5.151771,5.163364,5.739284


In [55]:
submit.to_csv('./baseline_submit.csv', index=False)

In [56]:
values_for_ = np.zeros((3,15890,1))

for i in range(15890):
    for j in range(3):
        values_for_[j,i] = values_for_scaling[j,i,scale_columns.index('판매량')]
gc.collect()


visualization = np.reshape(train.values,(15890,-1,in_features))[:,:,scale_columns_idx[scale_columns.index('판매량')]]*(values_for_[1]-values_for_[2])+values_for_[0]

for i in range(15890):
    sns.lineplot(x=list(range(visualization.shape[1])),y=visualization[i])
    sns.lineplot(x=list(range(visualization.shape[1],visualization.shape[1]+21)),y=result[i])
    plt.vlines(459-window_size,-3,0,color='green', linestyle='solid', linewidth=3)

    plt.grid(True,linestyle='--')
    plt.title(f'{i}')
    plt.show()

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# 시각화 후에 다른 작업을 하신다면 꼭 실행해주세요
del visualization
gc.collect()