In [None]:
import pandas as pd

In [None]:
all_data = pd.read_csv("all.leaf.thermistor.data.csv")
all_data.head()

In [None]:
# Supposedly they discarded all data relating to JLA leaf 1, tree 67
all_data = all_data[~((all_data['population'] == "JLA") & (all_data['leaf'] == 1) & (all_data['tree.id'] == 67))]

In [None]:
all_data.columns

In [None]:
all_data = all_data.drop(['minute', 'time', 'hour', 'date', 'tree.id', 'leaf'], axis=1)

In [None]:
all_data['timestamp'] = pd.to_datetime(all_data['timestamp'], format="%m/%d/%y %H:%M")

In [None]:
all_data = all_data[all_data['timestamp'].dt.minute == 0]

In [None]:
all_data = all_data.rename(columns={'temp' : 'leaftemp'})

In [None]:
populations = ['CCR', 'JLA', 'NRV', 'TSZ']

In [None]:
def timestamp_to_cols(df):
    # Add Year column
    df["year"] = df['timestamp'].apply(lambda x: x.strftime("%Y")).astype(int)
    df["julian-day"] = df['timestamp'].apply(lambda x: x.strftime("%j")).astype(int)
    df['standard-time'] = df['timestamp'].apply(lambda x: x.strftime("%H")).astype(int)

    return df

In [None]:
for pop in populations:

    # leaft = all_data.loc[all_data['population'] == pop].groupby(['timestamp']).mean(numeric_only=True).reset_index()
    leaft = all_data.loc[all_data['population'] == pop].copy()
    mean_leaft = all_data.loc[all_data['population'] == pop].groupby(['timestamp']).mean(numeric_only=True).reset_index()
    stderr_leaft = all_data.loc[all_data['population'] == pop].groupby(['timestamp']).std(numeric_only=True).reset_index()

    leaft = timestamp_to_cols(leaft).drop(columns=['timestamp']).reindex(columns=['year', 'julian-day', 'standard-time', 'leaftemp', 'logger.id'])
    mean_leaft = timestamp_to_cols(mean_leaft).drop(columns=['timestamp']).reindex(columns=['year', 'julian-day', 'standard-time', 'leaftemp'])
    stderr_leaft = timestamp_to_cols(stderr_leaft).drop(columns=['timestamp']).reindex(columns=['year', 'julian-day', 'standard-time', 'leaftemp'])

    pop = pop.lower()

    hourly = pd.read_csv(f"./ground/{pop}_hourly_data.csv")

    # Match Tair_C from hourly to leaft by year, julian-day, and standard-time
    leaft = leaft.merge(
        hourly[['year', 'julian-day', 'standard-time', 'Tair_C']],
        on=['year', 'julian-day', 'standard-time'],
        how='inner'
    )

    leaft.to_csv(f"./ground/{pop}_leaftemp.csv", index=False)
    # Align leaftemp to the hourly index using a merge on year, julian-day, and standard-time
    hourly = hourly.merge(
        mean_leaft[['year', 'julian-day', 'standard-time', 'leaftemp']],
        on=['year', 'julian-day', 'standard-time'],
        how='left'
    )
    hourly.to_csv(f"./ground/{pop}_hourly_data.csv", index=False)

    stderr_hourly = pd.read_csv(f"./ground/{pop}_std_error.csv")
    # Align leaftemp to the hourly index using a merge on year, julian-day, and standard-time
    stderr_hourly = stderr_hourly.merge(
        stderr_leaft[['year', 'julian-day', 'standard-time', 'leaftemp']],
        on=['year', 'julian-day', 'standard-time'],
        how='left'
    )
    stderr_hourly.to_csv(f"./ground/{pop}_std_error.csv", index=False)
