In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def set_file_path(input_file):
    if 11 <= input_file <= 17:
        raw_data_dir = r"../dataset/raw_data/lettuce/labels/training"
        prep_data_dir = r"../dataset/prep_data/lettuce/labels/training"
    else:
        raw_data_dir = r"../dataset/raw_data/lettuce/labels/validation"
        prep_data_dir = r"../dataset/prep_data/lettuce/labels/validation"

    if input_file == 11:
        vegetableTotalDays = 37
    elif input_file == 12:
        vegetableTotalDays = 58
    elif input_file == 13:
        vegetableTotalDays = 58
    elif input_file == 14:
        vegetableTotalDays = 54
    elif input_file == 15:
        vegetableTotalDays = 56
    elif input_file == 16:
        vegetableTotalDays = 58
    elif input_file == 17:
        vegetableTotalDays = 56
    elif input_file == 18:
        vegetableTotalDays = 58
    elif input_file == 19:
        vegetableTotalDays = 58
    elif input_file == 20:
        vegetableTotalDays = 56
    else:
        vegetableTotalDays = 0

    input_file_path = os.path.join(raw_data_dir, f"{input_file}.csv")
    output_file_path = os.path.join(prep_data_dir, f"{input_file}_{vegetableTotalDays}.csv")

    labels_df = pd.read_csv(input_file_path, encoding='euc-kr')

    return output_file_path, labels_df, vegetableTotalDays

In [3]:
def save_labels(output_file_path, labels_df):
    print(labels_df.shape)
    print(labels_df.dtypes)

    labels_df.to_csv(output_file_path, index=False, encoding='euc-kr')

In [4]:
def test_labels(labels_df):
    print(labels_df.shape)
    print(labels_df.dtypes)

    time_column = 'index'
    selected_columns = ['gardenTemp', 'gardenHumid', 'gardenLight', 'gardenCO2', 'vegetableAge', 'vegetableGrowth']

    for column in selected_columns:
        plt.figure(figsize=(10, 6))
        plt.plot(labels_df[time_column], labels_df[column], label=column)
        plt.title(f'{column} (10mins Interval)')
        plt.xlabel('Time')
        plt.legend()
        plt.show()

In [5]:
def prep_labels(input_file):
    output_file_path, labels_df, vegetableTotalDays = set_file_path(input_file)

    labels_df.rename(columns={
        '시간': 'index',
        '내부온도관측치': 'gardenTemp',
        '외부습도관측치': 'gardenHumid',
        '총추정광량': 'gardenLight',
        'CO2관측치': 'gardenCO2'
    }, inplace=True)

    labels_df.drop(labels=[
        'EC관측치', '내부습도관측치', '외부온도관측치', '펌프상태', '펌프작동남은시간', '최근분무량',
        '일간누적분무량', '냉방상태', '냉방작동남은시간', '난방상태', '난방작동남은시간', '내부유동팬상태',
        '내부유동팬작동남은시간', '외부환기팬상태', '외부환기팬작동남은시간', '화이트 LED상태', '화이트 LED작동남은시간',
        '화이트 LED동작강도', '레드 LED상태', '레드 LED작동남은시간', '레드 LED동작강도', '블루 LED상태',
        '블루 LED작동남은시간', '블루 LED동작강도', '카메라상태', '냉방온도', '난방온도', '기준온도', '내외부온도차',
        '난방부하', '냉방부하', '백색광추정광량', '적색광추정광량', '청색광추정광량'
    ], axis=1, inplace=True)

    labels_df['vegetableAge'] = np.nan
    labels_df['vegetableAge'] = labels_df['vegetableAge'].fillna(0).astype(int)
    labels_df['vegetableGrowth'] = np.nan
    labels_df['vegetableGrowth'] = labels_df['vegetableGrowth'].astype(np.float64) + 0.000000000000
    labels_df['vegetableGrowthRate'] = np.nan
    labels_df['vegetableGrowthRate'] = labels_df['vegetableGrowthRate'].astype(np.float64) + 0.000000000000

    labels_df['gardenTemp'] = labels_df['gardenTemp'] - 5
    labels_df = labels_df[(labels_df['gardenTemp'] >= 13) & (labels_df['gardenTemp'] < 30)]
    labels_df = labels_df[(labels_df['gardenHumid'] >= 20) & (labels_df['gardenHumid'] < 65)]
    labels_df = labels_df[(labels_df['gardenLight'] >= 0) & (labels_df['gardenLight'] < 420)]
    labels_df = labels_df[(labels_df['gardenCO2'] >= 330) & (labels_df['gardenCO2'] < 650)]

    labels_df = labels_df.reset_index(drop=True)
    chunk_size = int(np.floor(len(labels_df) / (vegetableTotalDays * 144)))

    for column in labels_df.columns[1:]:
        labels_df[column] = labels_df[column].groupby(np.arange(len(labels_df)) // chunk_size).mean()

    labels_df = labels_df.iloc[:(vegetableTotalDays * 144)]
    labels_df['vegetableAge'] = (labels_df.index // 144) + 1
    labels_df['index'] = ((labels_df.index % (vegetableTotalDays * 144)) + 1)

    num_rows = len(labels_df)
    x_values = np.arange(0, num_rows)
    y_values = np.empty_like(x_values, dtype=float)

    for i in range(num_rows):
        if i < num_rows // 24:
            y_values[i] = 0.000000000000
        elif i < 2 * num_rows // 24:
            y_values[i] = np.random.uniform(0.080808080808, 0.121212121212)
        elif i < 4 * num_rows // 24:
            y_values[i] = np.random.uniform(0.010101010101, 0.585858585858)
        elif i < 6 * num_rows // 24:
            y_values[i] = np.random.uniform(0.343434343434, 0.858585858585)
        elif i < 8 * num_rows // 24:
            y_values[i] = np.random.uniform(0.686868686868, 1.212121212121)
        elif i < 10 * num_rows // 24:
            y_values[i] = np.random.uniform(0.959595959595, 1.868686868686)
        elif i < 12 * num_rows // 24:
            y_values[i] = np.random.uniform(1.323232323232, 2.525252525252)
        elif i < 14 * num_rows // 24:
            y_values[i] = np.random.uniform(1.565656565656, 2.878787878787)
        elif i < 16 * num_rows // 24:
            y_values[i] = np.random.uniform(1.494949494949, 2.343434343434)
        elif i < 18 * num_rows // 24:
            y_values[i] = np.random.uniform(1.121212121212, 1.535353535353)
        elif i < 20 * num_rows // 24:
            y_values[i] = np.random.uniform(0.989898989898, 1.393939393939)
        else:
            y_values[i] = np.random.uniform(0.626262626262, 1.212121212121)

    y_values = np.cumsum(y_values)
    labels_df['vegetableGrowth'] = y_values.astype(np.float64)
    
    labels_df['vegetableGrowthRate'] = 0.000000000000
    growth_rate_values = np.diff(labels_df['vegetableGrowth'].values, prepend=0)
    labels_df['vegetableGrowthRate'] = growth_rate_values


    save_labels(output_file_path, labels_df)

    # test_labels(labels_df)

In [6]:
# for input_file in range(12, 21):
#     print(input_file)
#     prep_labels(input_file)

prep_labels(11)

(5328, 8)
index                    int64
gardenTemp             float64
gardenCO2              float64
gardenHumid            float64
gardenLight            float64
vegetableAge             int64
vegetableGrowth        float64
vegetableGrowthRate    float64
dtype: object
