In [1]:
import os
import pandas as pd
import sys
sys.path.append('../')
import utils.csv as csv

In [9]:
PATH='D:\\Deutschland\\FUB\\master_thesis\\data\\gee'
INPUT_DIR = os.path.join(PATH,'bw_polygons_cloud10')
OUTPUT_DIR = os.path.join(PATH, 'output')
DATA_DIR = os.path.join(OUTPUT_DIR, 'bw_daily')

DATE_CSV = 'occurrence_10.csv'
MERGE_CSV = 'merged_10.csv'

date_path = os.path.join(OUTPUT_DIR, DATE_CSV)
merge_path = os.path.join(OUTPUT_DIR, MERGE_CSV)

files = os.listdir(INPUT_DIR)

1 Functions for Pandas DataFrame

In [3]:
def shuffle(df:pd.DataFrame, ref:pd.DataFrame) -> pd.DataFrame:
    """Pre-processing of each input time series data"""
    df = df[~df.index.duplicated(keep='last')]
    df = pd.merge(df, ref, how='right', on='date')
    df.drop(columns=['count','spacecraft_id', 'id'], inplace=True)
    return df

In [4]:
def zero_padding(df:pd.DataFrame) -> pd.DataFrame:
    """Padding all NaN with 0"""
    df.fillna(0,inplace=True)
    return df

In [5]:
def monthly_mean_interpolate(df:pd.DataFrame) -> pd.DataFrame:
    """Calculate mean band value in vegetation months for each year"""
    df.interpolate(method='time', inplace=True)
    mean_df = df.resample('M').mean()
    mean_df = mean_df[((mean_df.index.month >= 5) & (mean_df.index.month <=9))]
    # cols = list(mean_df.keys())
    # for col in cols:
    #     mean_df[col] = mean_df[col].fillna(mean_df.groupby(mean_df.index.month)[col].transform('mean'))
    return mean_df

In [6]:
def reshape(df:pd.DataFrame) -> pd.DataFrame:
    """Turn all combination of reflectance value and date into one row"""
    # new keys and values
    keys = list(df.keys())
    data = {'id':int(df.iat[0, -1])}
    for index, row in df.iterrows():
        date = index.strftime('%Y%m%d')
        for key in keys[:-1]:
            column = f'{date} {key}'
            data[column] = [row[key]]
    # reshape data
    return pd.DataFrame(data)

2 Count all available dates among all polygons

In [7]:
def rename_file() -> None:
    tmp_dir = os.path.join(PATH,'tmp')
    for file in files:
        if file.endswith(".csv"):
            in_path = os.path.join(INPUT_DIR, file)
            try:
                df = csv.load(in_path, 'date', True)
                if df.empty:
                    continue
            except Exception:
                # csv.delete(in_path)
                continue
            # df = df[df.index.year < 2022]
            id = df.iloc[0, 11]
            out_path = os.path.join(tmp_dir, f'plot_{id}.csv')
            csv.export(df, out_path, True)
rename_file()

export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_163165.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_412396.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_395429.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_397501.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_397546.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_223122.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_223158.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_239029.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_242415.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_375867.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_248881.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_248915.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\tmp\plot_248916.csv
export file D:\Deutschland\FUB\master_

In [10]:
def count_dates() -> pd.DataFrame:
    """Count all available dates among all polygons"""
    map = {}
    # read each csv file
    for file in files:
        if file.endswith(".csv"):
            in_path = os.path.join(INPUT_DIR, file)
            df = csv.load(in_path, 'date', True)
            # count date occurrence
            for index, row in df.iterrows():
                date = index.strftime('%Y%m%d')
                map[date] = map.get(date, 0) + 1
    # export output as csv
    dates = list(map.keys())
    counts = list(map.values())
    output = pd.DataFrame({'date':dates, 'count':counts})
    output.sort_values(by='date', ascending=True, inplace=True)
    csv.export(output, date_path, index=False)
    return output
count_dates()

export file D:\Deutschland\FUB\master_thesis\data\gee\output\occurrence_10.csv


Unnamed: 0,date,count
0,20170410,6780
1,20170430,7118
2,20170510,7129
3,20170527,5057
4,20170616,3343
...,...,...
120,20210908,1497
82,20210913,3968
83,20210923,3442
121,20211001,2273


3 Merge all data frames to one csv file

In [12]:
def merge_data_frame() -> pd.DataFrame:
    """Merge all data frames to one csv file"""
    dates = csv.load(date_path, 'date', True)
    merged = pd.DataFrame(columns=['id'])
    # add each csv file to input list as data frame
    for file in files:
        if file.endswith(".csv"):
            in_path = os.path.join(INPUT_DIR, file)
            df = csv.load(in_path, 'date', True)
            id = df.iloc[0, 11]
            merged.loc[len(merged.index)] = [id]
            # pre-process
            df = shuffle(df, dates)
            df = zero_padding(df)
            # export each new csv file
            out_path = os.path.join(DATA_DIR, file[5:])
            csv.export(df, out_path, True)
    csv.export(merged, merge_path, False)
    return merged
merge_data_frame()

export file D:\Deutschland\FUB\master_thesis\data\gee\output\bw_daily\101046.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\bw_daily\101879.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\bw_daily\101938.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\bw_daily\102746.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\bw_daily\102880.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\bw_daily\103017.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\bw_daily\103031.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\bw_daily\103032.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\bw_daily\103034.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\bw_daily\103116.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\bw_daily\103257.csv
export file D:\Deutschland\FUB\master_thesis\data\gee\output\bw_daily\103408.csv
export file D:\Deutschland\F

Unnamed: 0,id
0,101046
1,101879
2,101938
3,102746
4,102880
...,...
6322,98382
6323,98383
6324,99621
6325,99650
