In [15]:
import os
import pandas as pd
import sys
sys.path.append('../')
import utils.csv as csv

In [16]:
PATH='D:\\Deutschland\\FUB\\master_thesis\\data\\gee'
INPUT_DIR = os.path.join(PATH,'extract_cloud10')
OUTPUT_DIR = os.path.join(PATH, 'output')
DATA_DIR = os.path.join(OUTPUT_DIR, 'daily')

DATE_CSV = 'occurrence_10.csv'
MERGE_CSV = 'merged_10.csv'

date_path = os.path.join(OUTPUT_DIR, DATE_CSV)
merge_path = os.path.join(OUTPUT_DIR, MERGE_CSV)

files = os.listdir(INPUT_DIR)

1 Functions for Pandas DataFrame

In [17]:
def shuffle(df:pd.DataFrame, ref:pd.DataFrame) -> pd.DataFrame:
    """Pre-processing of each input time series data"""
    df = df[~df.index.duplicated(keep='last')]
    df = pd.merge(df, ref, how='right', on='date')
    df.drop(columns=['count', 'B9','spacecraft_id', 'id'], inplace=True)
    # df.dropna(axis=0, how='any', inplace=True)
    return df

In [18]:
def monthly_mean_interpolate(df:pd.DataFrame) -> pd.DataFrame:
    """Calculate mean band value in vegetation months for each year"""
    df.interpolate(method='time', inplace=True)
    mean_df = df.resample('M').mean()
    mean_df = mean_df[((mean_df.index.month >= 5) & (mean_df.index.month <=9))]
    # cols = list(mean_df.keys())
    # for col in cols:
    #     mean_df[col] = mean_df[col].fillna(mean_df.groupby(mean_df.index.month)[col].transform('mean'))
    return mean_df

In [19]:
def zero_padding(df:pd.DataFrame) -> pd.DataFrame:
    """Padding all NaN with 0"""
    df.fillna(0,inplace=True)
    return df

In [20]:
def reshape(df:pd.DataFrame) -> pd.DataFrame:
    """Turn all combination of reflectance value and date into one row"""
    # new keys and values
    keys = list(df.keys())
    data = {'id':int(df.iat[0, -1])}
    for index, row in df.iterrows():
        date = index.strftime('%Y%m%d')
        for key in keys[:-1]:
            column = f'{date} {key}'
            data[column] = [row[key]]
    # reshape data
    return pd.DataFrame(data)

2 Count all available dates among all polygons

In [9]:
def count_dates() -> pd.DataFrame:
    """Count all available dates among all polygons"""
    map = {}
    # read each csv file
    for file in files:
        if file.endswith(".csv"):
            in_path = os.path.join(INPUT_DIR, file)
            try:
                df = csv.load(in_path, 'date', True)
                if df.empty:
                    csv.delete(in_path)
                    continue
                # count date occurrence
                for index, row in df.iterrows():
                    date = index.strftime('%Y%m%d')
                    map[date] = map.get(date, 0) + 1
            except Exception:
                # csv.delete(in_path)
                continue
    # export output as csv
    dates = list(map.keys())
    counts = list(map.values())
    output = pd.DataFrame({'date':dates, 'count':counts})
    output.sort_values(by='date', ascending=True, inplace=True)
    csv.export(output, date_path, index=False)
    return output

In [13]:
tmp_dir = os.path.join(PATH,'tmp')
for file in files:
    if file.endswith(".csv"):
        in_path = os.path.join(INPUT_DIR, file)
        df = csv.load(in_path, 'date', True)
        df = df[df.index.year < 2022]
        id = df.iloc[0, 12]
        out_path = os.path.join(tmp_dir, f'plot_{id}.csv')
        csv.export(df, out_path, True)

export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_17.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_18.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_612.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_5933.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_2598.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_2758.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_3064.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_3765.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_5603.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_6213.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_7402.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_7532.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_8019.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_8622.c

3 Merge all data frames to one csv file

In [21]:
def merge_data_frame() -> pd.DataFrame:
    """Merge all data frames to one csv file"""
    data_frames = []
    dates = csv.load(date_path, 'date', True)
    # add each csv file to input list as data frame
    for file in files:
        if file.endswith(".csv"):
            in_path = os.path.join(INPUT_DIR, file)
            df = csv.load(in_path, 'date', True)
            # df.drop(columns=['B9','spacecraft_id', 'id'], inplace=True)
            # pre-process
            df = shuffle(df, dates)
            df = zero_padding(df)
            # df = monthly_mean_interpolate(df)
            # reshape columns to one row
            # df = reshape(df)
            data_frames.append(df)
            # export each new csv file
            out_path = os.path.join(DATA_DIR, file[5:])
            csv.export(df, out_path, True)
    # output = pd.concat(data_frames, ignore_index=True)
    # export_csv_file(output, merge_path, False)
    # return output

In [22]:
if __name__ == "__main__":
    # count_dates()
    merge_data_frame()

export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\1.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\10.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\100.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\1000.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\10000.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\10002.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\10003.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\10004.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\10005.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\10006.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\10007.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\10008.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\daily\10009.csv
export fi