In [201]:
import pandas as pd

In [202]:
all_data = pd.read_csv("all.leaf.thermistor.data.csv")
all_data.head()

Unnamed: 0,timestamp,date,time,hour,minute,logger.id,tree.id,leaf,population,temp
0,7/24/23 17:00,7/24/23,17:00:00,17,0,1,67,1,JLA,49.92
1,7/24/23 17:30,7/24/23,17:30:00,17,30,1,67,1,JLA,48.14
2,7/24/23 18:00,7/24/23,18:00:00,18,0,1,67,1,JLA,47.26
3,7/24/23 18:30,7/24/23,18:30:00,18,30,1,67,1,JLA,44.34
4,7/24/23 19:00,7/24/23,19:00:00,19,0,1,67,1,JLA,42.06


In [203]:
all_data.columns

Index(['timestamp', 'date', 'time', 'hour', 'minute', 'logger.id', 'tree.id',
       'leaf', 'population', 'temp'],
      dtype='object')

In [204]:
all_data = all_data.drop(['minute', 'time', 'hour', 'date', 'tree.id', 'leaf'], axis=1)

In [205]:
all_data['timestamp'] = pd.to_datetime(all_data['timestamp'], format="%m/%d/%y %H:%M")

In [206]:
all_data = all_data[all_data['timestamp'].dt.minute == 0]

In [207]:
all_data = all_data.rename(columns={'temp' : 'leaftemp'})

In [208]:
populations = ['CCR', 'JLA', 'NRV', 'TSZ']

In [209]:
def timestamp_to_cols(df):
    # Add Year column
    df["year"] = df['timestamp'].apply(lambda x: x.strftime("%Y")).astype(int)
    df["julian-day"] = df['timestamp'].apply(lambda x: x.strftime("%j")).astype(int)
    df['standard-time'] = df['timestamp'].apply(lambda x: x.strftime("%H")).astype(int)

    return df

In [210]:
for pop in populations:

    # leaft = all_data.loc[all_data['population'] == pop].groupby(['timestamp']).mean(numeric_only=True).reset_index()
    leaft = all_data.loc[all_data['population'] == pop].copy()
    mean_leaft = all_data.loc[all_data['population'] == pop].groupby(['timestamp']).mean(numeric_only=True).reset_index()
    stderr_leaft = all_data.loc[all_data['population'] == pop].groupby(['timestamp']).std(numeric_only=True).reset_index()

    leaft = timestamp_to_cols(leaft).drop(columns=['timestamp']).reindex(columns=['year', 'julian-day', 'standard-time', 'leaftemp', 'logger.id'])
    mean_leaft = timestamp_to_cols(mean_leaft).drop(columns=['timestamp']).reindex(columns=['year', 'julian-day', 'standard-time', 'leaftemp'])
    stderr_leaft = timestamp_to_cols(stderr_leaft).drop(columns=['timestamp']).reindex(columns=['year', 'julian-day', 'standard-time', 'leaftemp'])

    pop = pop.lower()

    hourly = pd.read_csv(f"./ground/{pop}_hourly_data.csv")

    # Match Tair_C from hourly to leaft by year, julian-day, and standard-time
    leaft = leaft.merge(
        hourly[['year', 'julian-day', 'standard-time', 'Tair_C']],
        on=['year', 'julian-day', 'standard-time'],
        how='inner'
    )

    leaft.to_csv(f"./ground/{pop}_leaftemp.csv", index=False)
    # Align leaftemp to the hourly index using a merge on year, julian-day, and standard-time
    hourly = hourly.merge(
        mean_leaft[['year', 'julian-day', 'standard-time', 'leaftemp']],
        on=['year', 'julian-day', 'standard-time'],
        how='left'
    )
    hourly.to_csv(f"./ground/{pop}_hourly_data.csv", index=False)

    stderr_hourly = pd.read_csv(f"./ground/{pop}_std_error.csv")
    # Align leaftemp to the hourly index using a merge on year, julian-day, and standard-time
    stderr_hourly = stderr_hourly.merge(
        stderr_leaft[['year', 'julian-day', 'standard-time', 'leaftemp']],
        on=['year', 'julian-day', 'standard-time'],
        how='left'
    )
    stderr_hourly.to_csv(f"./ground/{pop}_std_error.csv", index=False)
