In [1]:
import os

os.chdir("/mnt/d/데이터분석/Dacon/Lettuce_growing_AI")
project_path = os.getcwd()

project_path

'/mnt/d/데이터분석/Dacon/Lettuce_growing_AI'

In [4]:
import random
import pandas as pd
import numpy as np
import math
import glob

import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from tqdm import tqdm
import warnings
warnings.filterwarnings(action='ignore')

## Hyper Parameter Setting

In [5]:
CFG = {
    'EPOCHS' : 100,
    'LEARNING_RATE': 1e-3,
    'BATCH_SIZE': 16,
    'SEED':2358
}

In [6]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_everything(CFG['SEED'])

In [7]:
all_input_list = sorted(glob.glob('dataset/train_input/*.csv'))
all_target_list = sorted(glob.glob('dataset/train_target/*.csv'))

In [9]:
train_input_list = all_input_list[:25]
train_target_list = all_target_list[:25]

val_input_list = all_input_list[25:]
val_target_list = all_target_list[25:]

print(f'학습: {len(train_input_list)}, 검증:{len(val_input_list)}')

학습: 25, 검증:3


## CustomDataset

In [12]:
class Dataloader(tf.keras.utils.Sequence):
    def __init__(self, input_paths, target_paths, batch_size, infer_mode, shuffle=False):
        self.input_paths = input_paths
        self.target_paths = target_paths
        self.batch_size = batch_size
        self.infer_mode = infer_mode
        self.shuffle = shuffle

        self.data_list = []
        self.label_list = []
        print('Data Pre-processing..')
        for input_path, target_path in tqdm(zip(self.input_paths, self.target_paths)):
            input_df = pd.read_csv(input_path)
            target_df = pd.read_csv(target_path)

            input_df = input_df.drop(columns=['obs_time'])
            input_df = input_df.fillna(0)


            target_length = int(len(target_df))

            for idx in range(target_length):
                time_series = input_df[24*idx:24*(idx+1)].values
                self.data_list.append(time_series)

            for label in target_df['predicted_weight_g']:
                self.label_list.append(label)
        print('Done. \n')
        self.on_epoch_end()

    def __len__(self):
        return math.ceil(len(self.data_list)/self.batch_size)

    def __getitem__(self, idx):
        indices = self.indices[idx*self.batch_size:(idx+1)*self.batch_size]

        data = [self.data_list[i] for i in indices]
        label = [self.label_list[i] for i in indices]

        if self.infer_mode == False:
            return tf.convert_to_tensor(data), tf.convert_to_tensor(label)
        else:
            return tf.convert_to_tensor(data)

    def on_epoch_end(self):
        self.indices = np.arange(len(self.data_list))

        if self.shuffle == True:
            np.random.shuffle(self.indices)

In [13]:
train_loader = Dataloader(train_input_list, train_target_list, CFG['BATCH_SIZE'], False, shuffle=True)
val_loader = Dataloader(val_input_list, val_target_list, CFG['BATCH_SIZE'], False, shuffle=True)

Data Pre-processing..


25it [00:00, 37.19it/s]


Done. 

Data Pre-processing..


3it [00:00, 54.55it/s]

Done. 






## Model Define

In [14]:
class BaseModel(tf.keras.Model):
    def __init__(self):
        super(BaseModel, self).__init__()
        self.lstm = tf.keras.layers.LSTM(256)
        self.classifier = tf.keras.layers.Dense(1)

    def call(self, inputs):
        h = self.lstm(inputs)
        return self.classifier(h)

## Train

In [15]:
model = BaseModel()
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=CFG['LEARNING_RATE']),
    loss=tf.keras.losses.MeanAbsoluteError()
)

model.fit(
    train_loader, validation_data=val_loader,
    epochs=CFG['EPOCHS'],
    callbacks=[tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-8, verbose=1)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 12: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 31: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 50: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/10

<keras.callbacks.History at 0x7fc769f6f610>

In [16]:
test_input_list = sorted(glob.glob('dataset/test_input/*.csv'))
test_target_list = sorted(glob.glob('dataset/test_target/*.csv'))

In [17]:
for test_input_path, test_target_path in zip(test_input_list, test_target_list):
    print(test_target_path)
    test_loader = Dataloader([test_input_path], [test_target_path], CFG['BATCH_SIZE'], True, shuffle=False)
    model_pred = model.predict(test_loader)

    submit_df = pd.read_csv(test_target_path)
    submit_df['predicted_weight_g'] = model_pred
    submit_df.to_csv(test_target_path, index=False)

dataset/test_target/TEST_01.csv
Data Pre-processing..


1it [00:00, 15.33it/s]

Done. 






dataset/test_target/TEST_02.csv
Data Pre-processing..


1it [00:00, 34.69it/s]

Done. 






dataset/test_target/TEST_03.csv
Data Pre-processing..


1it [00:00, 71.35it/s]

Done. 






dataset/test_target/TEST_04.csv
Data Pre-processing..


1it [00:00, 36.83it/s]

Done. 






dataset/test_target/TEST_05.csv
Data Pre-processing..


1it [00:00, 37.52it/s]

Done. 







In [23]:
import zipfile
# os.chdir("dataset/test_target/")
submission = zipfile.ZipFile("submission.zip", 'w')
for path in test_target_list:
    path = path.split('/')[-1]
    submission.write(path)
submission.close()

In [24]:
os.getcwd()

'/mnt/d/데이터분석/Dacon/Lettuce_growing_AI/dataset/test_target'