# Read from Excel to CSV

In [3]:
import pandas as pd

f = pd.read_excel(r'data/17766_12_data.xlsx')
f.to_csv(r'data/17766_12_data.csv', index=None)

# Process
### Load, Group, Rename, Save

In [75]:
import pandas as pd

# 400 MB -> 716 KB

df = pd.read_csv(r"data/17766_12_data.csv",
                 usecols=['year', 'DOY', 'dendroNr', 'Art', 'Plot', 'RAH', 'RAZ_kum'],
                 na_values=['.'],
                 dtype={'RAH': float, 'RAZ_kum': float}
                 )
df['year'] = df['year'].map(lambda s: s[:4]).astype(int)  # Fix year from Excel str

# Generated by GPT-3.5
df = (
    df.groupby(['dendroNr', 'year', 'DOY'])
    .agg({'Art': 'first', 'Plot': 'first', 'RAZ_kum': 'mean', 'RAH': 'mean'})
    .rename(columns={'Art': 'species', 'RAZ_kum': 'growth', 'RAH': 'dbh'})
    .reset_index()
)

df = df[
    ~df['growth'].isna()
    & df['species'].isin(['Es', 'Bu', 'BAh'])
    & (
            ((df['year'] == 2012) & (df['DOY'] >= 92) & (df['DOY'] <= 274))
            | ((df['year'] == 2013) & (df['DOY'] >= 91) & (df['DOY'] <= 273))
    )
    ]

df = (
    df.groupby(['dendroNr', 'year'], group_keys=True)
    .apply(
        lambda x: x.assign(
            DOY=x['DOY'] - x['DOY'].iloc[0], growth=x['growth'] - x['growth'].iloc[0]
        )
    )
    .reset_index(drop=True)
)

df['year'] = df['year'].astype('category')

df['DOY'] = df['DOY'].astype('int')

df['species'] = pd.Categorical(df['species'], categories=['Bu', 'Es', 'BAh'], ordered=True)

df['species'] = df['species'].cat.rename_categories(["Beech", "Ash", "Sycamore"])

df.to_feather(r'data/17766_12.feather')