## Import

In [5]:
import random
import pandas as pd
import numpy as np
import os
import glob

import tensorflow as tf


from tqdm.auto import tqdm

# import warnings
# warnings.filterwarnings(action='ignore') 

  from .autonotebook import tqdm as notebook_tqdm


## Hyperparameter Setting

In [6]:
CFG = {
    'EPOCHS':30,
    'LEARNING_RATE':1e-3,
    'BATCH_SIZE':16,
    'SEED':41
}

## Fixed RandomSeed

In [7]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing

In [8]:
all_input_list = sorted(glob.glob('./data/train_input/*.csv'))
all_target_list = sorted(glob.glob('./data/train_target/*.csv'))

In [9]:
train_input_list = all_input_list[:25]
train_target_list = all_target_list[:25]

val_input_list = all_input_list[25:]
val_target_list = all_target_list[25:]

In [10]:
df_val_input = pd.concat(map(pd.read_csv, val_input_list))
df_val_target= pd.concat(map(pd.read_csv, val_target_list))

## DataGenerator

In [58]:
df_train_input = pd.concat(map(pd.read_csv, train_input_list))
#df_train_input.pop('obs_time')
df_train_input.head()
df_train_input.to_numpy()

array([[0, '00:00', 25.30000013, ..., 0.0, 0.0, 0.0],
       [0, '01:00', 25.68035718, ..., 0.0, 0.0, 0.0],
       [0, '02:00', 25.27333349, ..., 0.0, 0.0, 0.0],
       ...,
       [27, '21:00', 27.3949999491373, ..., 21155.5824999999, 0.0,
        159289.1798999999],
       [27, '22:00', 27.9399998346964, ..., 21155.5824999999, 0.0,
        159289.1798999999],
       [27, '23:00', 28.1700001398722, ..., 21155.5824999999, 0.0,
        159289.1798999999]], dtype=object)

In [59]:
def load_datasets(input_paths, target_paths):
    df_train_input = pd.concat(map(pd.read_csv, input_paths))
    df_train_input.pop('obs_time')
    df_train_target = pd.concat(map(pd.read_csv, target_paths))
    df_train_target.pop('DAT')
    return tf.keras.utils.timeseries_dataset_from_array(data=df_train_input.to_numpy(),
                                                        targets=df_train_target.to_numpy(),
                                                        sequence_length=24,
                                                        sequence_stride=24,
                                                        sampling_rate=1,
                                                        batch_size=CFG['BATCH_SIZE'])

In [60]:
train_dataset = load_datasets(train_input_list,train_target_list)
val_dataset = load_datasets(val_input_list,val_target_list)

## Model Define

In [61]:
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(256,input_shape=[24,15],return_sequences=True),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(128))
])
model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_8 (LSTM)               (None, 24, 256)           278528    
                                                                 
 time_distributed_8 (TimeDis  (None, 24, 1)            257       
 tributed)                                                       
                                                                 
Total params: 278,785
Trainable params: 278,785
Non-trainable params: 0
_________________________________________________________________


## Train

In [63]:
opt = tf.keras.optimizers.Adam(learning_rate=CFG['LEARNING_RATE'])
model.compile(optimizer=opt, loss=tf.keras.losses.MeanAbsoluteError())
model.fit(train_dataset, epochs=CFG['EPOCHS'], validation_data=val_dataset)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1ee155ff340>

## Run!!

Train Loss : [34.78066] Valid Loss : [27.56516]
Train Loss : [32.57272] Valid Loss : [26.10837]
Train Loss : [31.45994] Valid Loss : [25.48141]
Train Loss : [31.04932] Valid Loss : [25.18256]
Train Loss : [30.56838] Valid Loss : [24.92300]
Train Loss : [30.50989] Valid Loss : [24.88433]
Train Loss : [30.34756] Valid Loss : [24.77411]
Train Loss : [30.34128] Valid Loss : [24.76932]
Train Loss : [30.35129] Valid Loss : [24.75595]
Train Loss : [30.35481] Valid Loss : [24.74844]
Train Loss : [30.17720] Valid Loss : [24.74417]
Train Loss : [30.21044] Valid Loss : [24.74626]
Train Loss : [30.24884] Valid Loss : [24.74541]
Train Loss : [30.28647] Valid Loss : [24.74356]
Train Loss : [30.27907] Valid Loss : [24.74664]
Train Loss : [30.25056] Valid Loss : [24.75266]
Train Loss : [30.30515] Valid Loss : [24.75043]
Epoch    17: reducing learning rate of group 0 to 5.0000e-04.
Train Loss : [30.22714] Valid Loss : [24.75100]
Train Loss : [30.25020] Valid Loss : [24.74855]
Train Loss : [30.24410] Va

## Inference

In [13]:
test_input_list = sorted(glob.glob('./test_input/*.csv'))
test_target_list = sorted(glob.glob('./test_target/*.csv'))

In [14]:
def inference_per_case(model, test_loader, test_path, device):
    model.to(device)
    model.eval()
    pred_list = []
    with torch.no_grad():
        for X in iter(test_loader):
            X = X.float().to(device)
            
            model_pred = model(X)
            
            model_pred = model_pred.cpu().numpy().reshape(-1).tolist()
            
            pred_list += model_pred
    
    submit_df = pd.read_csv(test_path)
    submit_df['predicted_weight_g'] = pred_list
    submit_df.to_csv(test_path, index=False)

In [15]:
for test_input_path, test_target_path in zip(test_input_list, test_target_list):
    test_dataset = CustomDataset([test_input_path], [test_target_path], True)
    test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)
    inference_per_case(best_model, test_loader, test_target_path, device)

Data Pre-processing..


0it [00:00, ?it/s]

Done.
Data Pre-processing..


0it [00:00, ?it/s]

Done.
Data Pre-processing..


0it [00:00, ?it/s]

Done.
Data Pre-processing..


0it [00:00, ?it/s]

Done.
Data Pre-processing..


0it [00:00, ?it/s]

Done.


## Submission

In [16]:
import zipfile
os.chdir("./test_target/")
submission = zipfile.ZipFile("../submission.zip", 'w')
for path in test_target_list:
    path = path.split('/')[-1]
    submission.write(path)
submission.close()