---

# Forecasting Fruit Price using StemGNN
### Revision 23.04.05

---

## 1 Environment

In [6]:
import torch
print(torch.__version__, torch.cuda.is_available())
# 1.7.1+cu110 True

1.7.1+cu110 True


In [7]:
#import torch
import torch.nn as nn
import torch.utils.data as torch_data
import torch.nn.functional as F

from datetime import datetime
import pandas as pd
import numpy as np
import argparse, json, time, os, sys, importlib, itertools, shutil
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

import warnings
warnings.filterwarnings(action='ignore')

#from models.handler import train, test
sys.path.insert(0, './models')
import handler
importlib.reload(handler)

import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
%matplotlib inline

## 2 Data Check

In [29]:
#with Price Data
price_dataset = pd.read_csv('./dataset/price_dataset.csv',index_col=[0])
price_dataset.index = pd.to_datetime(price_dataset.index)
price_dataset.to_csv("./dataset/pre_price.csv", header=True, index=False)

## 3 Data Smoothing

In [30]:
# Data Smoothing using Rolling Average
plot_result = [price_dataset]
for i in [7, 14, 21, 28]: # 7d, 14d, 21d, 28d
    rolling_tmp = pd.DataFrame(price_dataset.rolling(i).mean()).dropna()
    for j in range(rolling_tmp.shape[1]):
        if j in range(0,5):
            rolling_tmp.iloc[:,j] = rolling_tmp.iloc[:,j].round(decimals = 0).astype('int')
        if j in range(4,10):
            rolling_tmp.iloc[:,j] = rolling_tmp.iloc[:,j].round(decimals = 1).astype('float64')
    rolling_tmp.iloc[:,10] = rolling_tmp.iloc[:,10].apply(np.ceil).astype('int')
    rolling_tmp.to_csv("./dataset/pre_price_s_"+str(i)+"d.csv", header=True, index=False)
    plot_result.append(rolling_tmp)

## 4 Train

In [31]:
def stemgnn (data, window_size, horizon, train_length, valid_length, test_length) :
    print('Dataset:', data, '| Window_size:',window_size,'| Horizon: ',horizon)
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--train', type=bool, default=True)
    parser.add_argument('--evaluate', type=bool, default=True)
    parser.add_argument('--dataset', type=str, default=data) #ECG_data
    parser.add_argument('--window_size', type=int, default=window_size)
    parser.add_argument('--horizon', type=int, default=horizon)
    parser.add_argument('--train_length', type=float, default=train_length)
    parser.add_argument('--valid_length', type=float, default=valid_length)
    parser.add_argument('--test_length', type=float, default=test_length)
    parser.add_argument('--epoch', type=int, default=200)
    parser.add_argument('--lr', type=float, default=1e-4)
    parser.add_argument('--multi_layer', type=int, default=5)
    #parser.add_argument('--device', type=str, default='cpu')
    parser.add_argument('--device', type=str, default='cuda')
    parser.add_argument('--validate_freq', type=int, default=1)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--norm_method', type=str, default='z_score')
    parser.add_argument('--optimizer', type=str, default='RMSProp')
    parser.add_argument('--early_stop', type=bool, default=True)
    parser.add_argument('--early_stop_step', type=int, default=20)
    parser.add_argument('--exponential_decay_step', type=int, default=5)
    parser.add_argument('--decay_rate', type=float, default=0.5)
    parser.add_argument('--dropout_rate', type=float, default=0.5)
    parser.add_argument('--leakyrelu_rate', type=int, default=0.2)

    #args = parser.parse_args()
    args = parser.parse_args(args=[])
    data_file = os.path.join('dataset', args.dataset + '.csv')
    data = pd.read_csv(data_file).values
    
    # make dirs
    args.dataset = args.dataset + '_w_'+str(args.window_size)+'_h_'+str(args.horizon)
    result_train_file = os.path.join('output', args.dataset, 'train')
    result_test_file = os.path.join('output', args.dataset, 'test')
    if not os.path.exists(result_train_file):
        os.makedirs(result_train_file)
    if not os.path.exists(result_test_file):
        os.makedirs(result_test_file)

    # split data
    #train_ratio = args.train_length / (args.train_length + args.valid_length + args.test_length)
    #valid_ratio = args.valid_length / (args.train_length + args.valid_length + args.test_length)
    #test_ratio = 1 - train_ratio - valid_ratio
    #train_data = data[:int(train_ratio * len(data))]
    #valid_data = data[int(train_ratio * len(data)):int((train_ratio + valid_ratio) * len(data))]
    #test_data = data[int((train_ratio + valid_ratio) * len(data)):]
    train_data = price_dataset.loc['2000-01-30':'2015-12-31'].values
    valid_data = price_dataset.loc['2016.01.01':'2019.12.31'].values
    test_data = price_dataset.loc['2020.01.01':'2022.04.01'].values
    
    # Train
    torch.manual_seed(0)
    if args.train:
        try:
            before_train = datetime.now().timestamp()
            _, normalize_statistic = handler.train(train_data, valid_data, args, result_train_file)
            after_train = datetime.now().timestamp()
            print(f'\nTraining took {(after_train - before_train) / 60} minutes')
        except KeyboardInterrupt:
            print('-' * 99)
            print('Exiting from training early')
            
    # Evaluate
    if args.evaluate:
        before_evaluation = datetime.now().timestamp()
        handler.test(test_data, args, result_train_file, result_test_file)
        after_evaluation = datetime.now().timestamp()
        #print(f'Evaluation took {(after_evaluation - before_evaluation) / 60} minutes')
    print('-'*30)
    
    # Post-Processing
    dir_list = os.listdir('./output')
    if '.ipynb_checkpoints' in dir_list:
        dir_list.remove('.ipynb_checkpoints')
    for i in dir_list :
        source = './output/'+i
        try:
            if os.path.exists(source+'/train'):
                shutil.rmtree(source+'/train')
        except:
            print(f'{i} <- remove exception')
    if os.path.exists('./output/.ipynb_checkpoints'):
        shutil.rmtree('./output/.ipynb_checkpoints')
    
    return args

In [32]:
csv_cases = ['pre_price', 'pre_price_s_7d','pre_price_s_14d', 'pre_price_s_21d', 'pre_price_s_28d']
window_cases = [a for a in range(15,61)]
horizon_cases = [a+1 for a in range(7)] + [14]
all_cases = list(itertools.product(*[csv_cases,window_cases,horizon_cases]))
print("경우의 수 : %s개 | ex:" %len(all_cases), all_cases[0])

경우의 수 : 1840개 | ex: ('pre_price', 15, 1)


In [28]:
for i in all_cases :
    args = stemgnn(data = i[0],
        window_size = i[1],
        horizon = i[2],
        train_length = 7,
        valid_length = 2,
        test_length = 1)

Dataset: pre_price | Window_size: 15 | Horizon:  1
Epoch:0 | 
Training took 0.021498699982961018 minutes

Performance on test set: MAPE:  0.72 | MAE: 2184.54 | RMSE: 4988.7357|
MAPE of potato : 0.1552 , lettuce : 0.3146 , onion : 0.1955 , cucumber : 0.2080
------------------------------
Dataset: pre_price | Window_size: 15 | Horizon:  2
Epoch:0 | 
Training took 0.021732719739278157 minutes

Performance on test set: MAPE:  0.73 | MAE: 2183.91 | RMSE: 5399.9561|
MAPE of potato : 0.1461 , lettuce : 0.2905 , onion : 0.1810 , cucumber : 0.2079
------------------------------
Dataset: pre_price | Window_size: 15 | Horizon:  3
Epoch:0 | 
Training took 0.021477270126342773 minutes

Performance on test set: MAPE:  0.71 | MAE: 2364.42 | RMSE: 5879.5493|
MAPE of potato : 0.1557 , lettuce : 0.2991 , onion : 0.1830 , cucumber : 0.2306
------------------------------
Dataset: pre_price | Window_size: 15 | Horizon:  4
Epoch:0 | 
Training took 0.021739999453226726 minutes

Performance on test set: MAPE:

## 5 Post-Processing
(skip)

## 6 Result
(skip)

---