In [None]:
import os
import sys
import glob
import pandas as pd
import numpy as np
import xarray as xr
import datetime
from datetime import datetime
import matplotlib.pyplot as plt
from itertools import groupby
import statistics

In [None]:
lat = 44.0646
lon = -71.28808




fname = "data/AmeriFlux/AMF_US-Bar_BASE-BADM_5-5/AMF_US-Bar_BASE_HH_5-5.csv"
df = pd.read_csv(fname,comment='#',na_values=-9999)

df = df.rename(columns={'TIMESTAMP_START':'dates',
                        'TA_PI_F_1_1_1':'tair',
                        'RH_PI_F_1_1_1':'rh',
                        'SW_IN_1_1_1':'swdown',
                        'WS_1_1_1':'wind',
                        'P_1_1_1':'rainf',
                        'VPD_PI_1_1_1':'vpd',
                        'CO2_1_1_1':'co2'})
"""
df = df.drop(['sunshine_duration_min',
              'vapor_presure_hPa'], axis=1)
"""
# Clean up the dates
df['dates'] = df['dates'].astype(str)
new_dates = []
for i in range(len(df)):
    year = df['dates'][i][0:4]
    month = df['dates'][i][4:6]
    day = df['dates'][i][6:8]
    hour = df['dates'][i][8:10]
    minute = df['dates'][i][10:12]
    if day.startswith("0"):
        day = day[1:]
    if hour.startswith("0"):
        hour = hour[1:]
    date = "%s/%s/%s %s:%s" % (year, month, day, hour, minute)
    new_dates.append(date)

df['dates'] = new_dates
df = df.set_index('dates')
df.index = pd.to_datetime(df.index)

# fix units
#hpa_2_kpa = 0.1
kpa_2_pa = 1000.
deg_2_kelvin = 273.15
#df.vpd *= hpa_2_kpa
df.tair += deg_2_kelvin
#df.air_temp_C_2100 += deg_2_kelvin
df.rainf /= 1800. # kg m-2 s-1

# sort out bad values
df.swdown = np.where(df.swdown < 0.0, 0.0, df.swdown)
df.vpd = np.where(df.vpd <= 0.05, 0.05, df.vpd)
df.rainf = np.where(df.rainf <= 0, 0, df.rainf)

In [None]:
meandf = df.groupby([df.index.month, df.index.day]).mean()

In [None]:
new_dates = []
for i in range(len(meandf.index)):
    month = meandf.index[i][0]
    day = meandf.index[i][1]
    date = "2000-%s-%s" % (month, day)
    date = datetime.strptime(date, "%Y-%m-%d")
    new_dates.append(date)
    
meandf['date'] = new_dates
meandf = meandf.set_index('date')

In [None]:
vars_to_keep = ["swdown","vpd","rainf"]
for var in vars_to_keep:
    print("Checking NaN locations for variable ", var)
    for k,g in groupby(meandf[var].isnull().values):
       print((k, sum(1 for i in g)))

In [44]:
fakedates = pd.date_range(start="2020-12-01",end="2020-12-13",freq='1H', closed='left')

In [45]:
newdf = pd.DataFrame({"time" : fakedates,
                     "tair" : np.NaN,
                     "rh" : np.NaN,
                     "swdown" : np.NaN,
                     "wind" : np.NaN,
                     "rainf" : np.NaN,
                     "vpd" : np.NaN,
                     "co2" : np.NaN})

Unnamed: 0,time,tair,rh,swdown,wind,rainf,vpd,co2
0,2020-12-01 00:00:00,,,,,,,
1,2020-12-01 01:00:00,,,,,,,
2,2020-12-01 02:00:00,,,,,,,
3,2020-12-01 03:00:00,,,,,,,
4,2020-12-01 04:00:00,,,,,,,
...,...,...,...,...,...,...,...,...
283,2020-12-12 19:00:00,,,,,,,
284,2020-12-12 20:00:00,,,,,,,
285,2020-12-12 21:00:00,,,,,,,
286,2020-12-12 22:00:00,,,,,,,


In [52]:
for var in ['swdown','tair','rh', 'wind', 'rainf', 'vpd', 'co2']:
    for i in range(len(newdf)):
        month = newdf['time'][i].month
        day = newdf['time'][i].day
        raw = meandf[var][np.logical_and(meandf.index.month == month , meandf.index.day == day)]
        value = raw.values
        newdf.loc[i, var] = value

1.3809873555382428

409.59213628209716

0      46.480853
1      46.480853
2      46.480853
3      46.480853
4      46.480853
         ...    
283    58.491547
284    58.491547
285    58.491547
286    58.491547
287    58.491547
Name: swdown, Length: 288, dtype: float64

In [68]:
    forecast_date = "2020-12-13"
    siteID_list = ["BART","KONZ","OSBS","SRER"]


    for siteID in siteID_list:
        lat = 44.0639*(siteID=="BART")+31.91068*(siteID=="KONZ")+39.10077*(siteID=="OSBS")+31.91068*(siteID=="SRER")
        lon = -71.2874*(siteID=="BART")+-81.99343*(siteID=="KONZ")+-96.56309*(siteID=="OSBS")+-110.83549*(siteID=="SRER")
        fname_list = os.listdir("data/forecastcsv/"+forecast_date+"/"+siteID)
        for fname in fname_list:
            inputcsv = "data/forecastcsv/"+forecast_date+"/"+siteID+"/"+fname
            print(inputcsv)
            df = pd.read_csv(inputcsv,comment='#',na_values=-9999)

            df = df.rename(columns={'time':'dates',
                                    'Tair':'tair',
                                    'SWdown':'swdown',
                                    'Wind':'wind',
                                    'Rainf':'rainf',
                                    'PSurf':'psurf',
                                    'LWdown':'lwdown',
                                    'Qair':'qair'})

            # Clean up the dates
            df['dates'] = df['dates'].astype(str)
            new_dates = []
            for i in range(len(df)):
                year = df['dates'][i][0:4]
                month = df['dates'][i][4:6]
                day = df['dates'][i][6:8]
                hour = df['dates'][i][8:10]
                minute = df['dates'][i][10:12]
                if day.startswith("0"):
                    day = day[1:]
                if hour.startswith("0"):
                    hour = hour[1:]
                date = "%s/%s/%s %s:%s" % (year, month, day, hour, minute)
                new_dates.append(date)

            df['dates'] = new_dates
            df = df.set_index('dates')
            df.index = pd.to_datetime(df.index)

            # Replace remaining NaNs (i.e. at the start) with mean
            df = df.fillna(df.mean())

            # Open the average met file
            averagemetcsv = "data/averagemet/"+siteID+"_"+forecast_date+".csv"
            meandf = pd.read_csv(averagemetcsv,comment='#',na_values=-9999,index_col="time")
            co2 = meandf['co2'].mean()
            # Add CO2
            df['co2'] = co2

            # Combine dataframes
            df = df.join(meandf)


data/forecastcsv/2020-12-13/BART/CABLEInput_BART_2020-12-13_2021-01-17_ens25.csv


ValueError: columns overlap but no suffix specified: Index(['tair', 'psurf', 'rainf', 'qair', 'lwdown', 'swdown', 'wind', 'co2'], dtype='object')

In [57]:
df

Unnamed: 0_level_0,tair,psurf,rainf,qair,lwdown,swdown,wind,co2
dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-12-13 00:00:00,272.116089,95524.742188,0.000074,0.003563,318.000000,0.000000,3.346879,397.329122
2020-12-13 01:00:00,272.494232,95395.867188,0.000074,0.003677,318.000000,0.000000,3.139083,397.329122
2020-12-13 02:00:00,272.900635,95285.039062,0.000074,0.003810,318.000000,0.000000,2.913510,397.329122
2020-12-13 03:00:00,273.327362,95190.882812,0.000074,0.003957,318.000000,0.000000,2.676824,397.329122
2020-12-13 04:00:00,273.766663,95112.015625,0.000074,0.004115,318.000000,0.000000,2.435687,397.329122
...,...,...,...,...,...,...,...,...
2021-01-16 20:00:00,268.445190,96194.132812,0.000000,0.001338,178.169708,175.737915,1.233682,397.329122
2021-01-16 21:00:00,268.104858,96205.382812,0.000000,0.001382,178.169708,0.000000,1.389222,397.329122
2021-01-16 22:00:00,267.105316,96225.976562,0.000000,0.001428,178.169708,0.000000,1.590060,397.329122
2021-01-16 23:00:00,265.353210,96257.343750,0.000000,0.001475,178.169708,0.000000,1.840859,397.329122


In [69]:
meandf

Unnamed: 0_level_0,Unnamed: 0,tair,rh,swdown,wind,rainf,vpd,co2,psurf,lwdown,qair
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2015-01-01 00:00:00,0,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325,252.797864,0.002093
2015-01-01 01:00:00,1,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325,252.797864,0.002093
2015-01-01 02:00:00,2,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325,252.797864,0.002093
2015-01-01 03:00:00,3,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325,252.797864,0.002093
2015-01-01 04:00:00,4,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325,252.797864,0.002093
...,...,...,...,...,...,...,...,...,...,...,...
2020-12-12 19:00:00,52147,270.694994,75.243857,58.491547,1.342477,0.000079,1.378684,409.592136,101325,256.091789,0.002291
2020-12-12 20:00:00,52148,270.694994,75.243857,58.491547,1.342477,0.000079,1.378684,409.592136,101325,256.091789,0.002291
2020-12-12 21:00:00,52149,270.694994,75.243857,58.491547,1.342477,0.000079,1.378684,409.592136,101325,256.091789,0.002291
2020-12-12 22:00:00,52150,270.694994,75.243857,58.491547,1.342477,0.000079,1.378684,409.592136,101325,256.091789,0.002291


In [70]:
pd.concat([meandf,df])

Unnamed: 0.1,Unnamed: 0,tair,rh,swdown,wind,rainf,vpd,co2,psurf,lwdown,qair
2015-01-01 00:00:00,0.0,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325.000000,252.797864,0.002093
2015-01-01 01:00:00,1.0,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325.000000,252.797864,0.002093
2015-01-01 02:00:00,2.0,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325.000000,252.797864,0.002093
2015-01-01 03:00:00,3.0,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325.000000,252.797864,0.002093
2015-01-01 04:00:00,4.0,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325.000000,252.797864,0.002093
...,...,...,...,...,...,...,...,...,...,...,...
2021-01-16 20:00:00,,268.445190,,175.737915,1.233682,0.000000,,397.329122,96194.132812,178.169708,0.001338
2021-01-16 21:00:00,,268.104858,,0.000000,1.389222,0.000000,,397.329122,96205.382812,178.169708,0.001382
2021-01-16 22:00:00,,267.105316,,0.000000,1.590060,0.000000,,397.329122,96225.976562,178.169708,0.001428
2021-01-16 23:00:00,,265.353210,,0.000000,1.840859,0.000000,,397.329122,96257.343750,178.169708,0.001475


In [66]:
meandf

Unnamed: 0.1,Unnamed: 0,time,tair,rh,swdown,wind,rainf,vpd,co2,psurf,lwdown,qair
0,0,2015-01-01 00:00:00,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325,252.797864,0.002093
1,1,2015-01-01 01:00:00,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325,252.797864,0.002093
2,2,2015-01-01 02:00:00,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325,252.797864,0.002093
3,3,2015-01-01 03:00:00,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325,252.797864,0.002093
4,4,2015-01-01 04:00:00,269.955154,71.511195,54.678515,1.957188,0.000005,1.427713,389.827418,101325,252.797864,0.002093
...,...,...,...,...,...,...,...,...,...,...,...,...
52147,52147,2020-12-12 19:00:00,270.694994,75.243857,58.491547,1.342477,0.000079,1.378684,409.592136,101325,256.091789,0.002291
52148,52148,2020-12-12 20:00:00,270.694994,75.243857,58.491547,1.342477,0.000079,1.378684,409.592136,101325,256.091789,0.002291
52149,52149,2020-12-12 21:00:00,270.694994,75.243857,58.491547,1.342477,0.000079,1.378684,409.592136,101325,256.091789,0.002291
52150,52150,2020-12-12 22:00:00,270.694994,75.243857,58.491547,1.342477,0.000079,1.378684,409.592136,101325,256.091789,0.002291
