In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('darkgrid')

from dateutil.relativedelta import relativedelta

import warnings
warnings.filterwarnings('ignore')

In [None]:
def make_pivot_table(df , index , values):
    date_format = '%Y%m%d%H%M'
    df['GetDateTime'] = pd.to_datetime(df['GetDateTime'] , format = date_format)

    data = df[[index , values]]

    pivot = data.pivot_table(
        index = index ,
        values = values ,
        aggfunc = 'sum'
    )

    return pivot

In [None]:
def sliding_window(idx , p_table , condition , col , columns):
    idx = idx
    start_date = p_table.index[idx]
    end_date = p_table.index[-1]
    result = []

    while (start_date + pd.DateOffset(weeks = 4)) <= end_date:
        arr = []
        for i in range(1 , 5):
            date_size = pd.DateOffset(weeks = i)
            end = start_date + date_size
            data = p_table[(p_table.index >= start_date) & (p_table.index <= end)]

            if condition == 'mean':
                arr.append(data.mean())
            elif condition == 'max':
                arr.append(max(data[col]))
            elif condition == 'min':
                arr.append(min(data[col]))
            elif condition == 'dif':
                arr.append(max(data[col]) - min(data[col]))
            else:
                arr.append(data.std())
        result.append(arr)
        idx += 1
        start_date = p_table.index[idx]

    return pd.DataFrame(result , columns = columns).astype('float')

In [None]:
def make_col(opt , condi):
    return [f'W{i}_{opt}_{condi}' for i in range(1 , 5)]

In [None]:
def make_sliding_data(id , df):
    p_table_High = make_pivot_table(df , 'GetDateTime' , 'HighSoilHumi')
    p_table_Low = make_pivot_table(df , 'GetDateTime' , 'LowSoilHumi')

    # mean
    mean_data_high = sliding_window(0 , p_table_High , 'mean' , 'HighSoilHumi' , make_col("High" , 'mean'))
    mean_data_low = sliding_window(0 , p_table_Low , 'mean' , 'LowSoilHumi' , make_col("Low" , 'mean'))

    # max
    max_data_high = sliding_window(0 , p_table_High , 'max' , 'HighSoilHumi' , make_col("High" , 'max'))
    max_data_low = sliding_window(0 , p_table_Low , 'max' , 'LowSoilHumi' , make_col("Low" , 'max'))

    # min
    min_data_high = sliding_window(0 , p_table_High , 'min' , 'HighSoilHumi' , make_col("High" , 'min'))
    min_data_low = sliding_window(0 , p_table_Low , 'min' , 'LowSoilHumi' , make_col("Low" , 'min'))

    # dif
    dif_data_high = sliding_window(0 , p_table_High , 'dif' , 'HighSoilHumi' , make_col("High" , 'dif'))
    dif_data_low = sliding_window(0 , p_table_Low , 'dif' , 'LowSoilHumi' , make_col("Low" , 'dif'))

    # std
    std_data_high = sliding_window(0 , p_table_High , 'std' , 'HighSoilHumi' , make_col("High" , 'std'))
    std_data_low = sliding_window(0 , p_table_Low , 'std' , 'LowSoilHumi' , make_col("Low" , 'std'))

    high_data = pd.concat([mean_data_high , max_data_high , min_data_high , dif_data_high , std_data_high] , axis = 1)
    low_data = pd.concat([mean_data_low , max_data_low , min_data_low , dif_data_low , std_data_low] , axis = 1)

    sliding_data = pd.concat([high_data , low_data] , axis = 1)
    sliding_data.insert(0 , column = 'PlantName' , value = df['PlantName'].unique()[0])
    sliding_data.insert(0 , column = 'ClassId' , value = df['ClassId'].unique()[0])

    return sliding_data

In [None]:
path = '/content/drive/MyDrive/Lab실 Project/files/plant_data/'

In [122]:
# 디펜바키아

df = pd.read_csv(path + '디펜바키아.csv')

In [123]:
df.head(3)

Unnamed: 0,GetDateTime,ClassId,ImageCaption,PlantName,PlantClass,PlantStem,SelfTopping,RootLiength,SoilState,GrowthStage,...,ImageType,PhotographerID,ImageTakeDT,ImageSize,ImageName,ImagePath,FilmingLocation,ShootingDistance,ImageResolution,Quantum
0,202308240801,B-3-15-B-3,2023년 08월 24일 08시 01분 01초에 다세대 주택 배란다에 있는 관상수 ...,디펜바키아,습생식물,0.0,0.0,0.0,과습한흙,생육상태,...,JPG,USERID,202308240801,959315,N50-B-3-15-B-3-V-230824-000048.jpg,"37.499035,127.025597",26,1080 * 1920,,
1,202308240800,B-3-15-B-1,2023년 08월 24일 08시 00분 57초에 다세대 주택 배란다에 있는 관상수 ...,디펜바키아,습생식물,0.0,0.0,0.0,건조한흙,생육상태,...,JPG,USERID,202308240800,3499070,N50-B-3-15-B-1-V-230824-000093.jpg,"37.499035,127.025597",26,2160 * 3840,,
2,202308240802,B-3-15-B-2,2023년 08월 24일 08시 02분 35초에 다세대 주택 배란다에 있는 관상수 ...,디펜바키아,습생식물,0.0,0.0,0.0,입반흙,생육상태,...,JPG,USERID,202308240802,3149930,N50-B-3-15-B-2-V-230824-000110.jpg,"37.499035,127.025597",26,2160 * 3840,,


In [None]:
df['ClassId'].unique()

array(['B-3-15-B-3', 'B-3-15-B-1', 'B-3-15-B-2', 'B-3-15-L-2',
       'B-3-15-L-3', 'B-3-15-L-1'], dtype=object)

In [None]:
df_list = []
id_arr = (df['ClassId'].unique())

for i in range(len(id_arr)):
    df_list.append(pd.DataFrame(df[df['ClassId'] == id_arr[i]]))

In [None]:
for df in df_list:
    print(df['ClassId'].unique())

['B-3-15-B-3']
['B-3-15-B-1']
['B-3-15-B-2']
['B-3-15-L-2']
['B-3-15-L-3']
['B-3-15-L-1']


In [None]:
# 디펜바키아 데이터

a = make_sliding_data(df_list[0]['ClassId'] , df_list[0])
b = make_sliding_data(df_list[1]['ClassId'] , df_list[1])
c = make_sliding_data(df_list[2]['ClassId'] , df_list[2])
d = make_sliding_data(df_list[3]['ClassId'] , df_list[3])
e = make_sliding_data(df_list[4]['ClassId'] , df_list[4])
f = make_sliding_data(df_list[5]['ClassId'] , df_list[5])
#g = make_sliding_data(df_list[6]['ClassId'] , df_list[6])
#h = make_sliding_data(df_list[7]['ClassId'] , df_list[7])
#i = make_sliding_data(df_list[8]['ClassId'] , df_list[8])

In [None]:
for df in [a , b , c , d , e , f]:
    try:
        print(f"Data size ({df['ClassId'].unique()[0]}) : {len(df)}")
        df.to_csv("/content/drive/MyDrive/Lab실 Project/files/sliding_data/디펜바키아/{}_data.csv".format(df['ClassId'].unique()[0]) , index = False) # 바꾸는 거 잊지 말자!
    except:
        print('데이터 없슴다.')

Data size (B-3-15-B-3) : 2707
Data size (B-3-15-B-1) : 2639
Data size (B-3-15-B-2) : 920
Data size (B-3-15-L-2) : 2159
Data size (B-3-15-L-3) : 2060
Data size (B-3-15-L-1) : 2968
