### Code used to clean and reformat weather data from
- MAC Season 4
- MAC Season 6
- KSU (Ashland Bottoms)
- Clemson (Pee Dee Research and Education Center)

### Weather parameters found in all raw data
- Date 
- Day of year
- Temperature minimum
- Temperature maximum
- Temperature mean
- Accumulated growing degree days (gdd)
- Relative humidity minimum
- Relative humidity maximum
- Relative humidity mean
- Vapor pressure deficit
- Precipitation
- Cumulative precipitation
- First water deficit treatment (for MAC season 4)
- Second water deficit treatment (for MAC season 4)

### Season dates
- MAC Season 4
    - Planting: 2017-04-20, Day 110
    - Last Day of Harvest: 2017-09-16, Day 259 
- MAC Season 6
    - Planting: 2018-04-25, Day 115
    - Harvest: 2018-08-01, Day 213
- KSU
    - Planting: 2016-06-17, Day 169
    - Harvest: 2016-10-21, Day 295
- Clemson

#### See README for information on units, accessing raw data, and downloading processed data

In [1]:
import datetime
import numpy as np
import pandas as pd

##### Functions

In [60]:
def calculate_vpd(temp_avg, rh_avg):
    es = (6.11 * np.exp((2500000/461) * (1/273 - 1/(273 + temp_avg))))
    vpd = (((100 - rh_avg)/1000) * es)
    return vpd

In [45]:
def save_to_csv(list_of_dfs, list_of_output_filenames):
    for i,j in zip(list_of_dfs, list_of_output_filenames):
        i.to_csv(j, index=False)

##### URLs for downloading raw weather data

In [73]:
s4_url = 'https://de.cyverse.org/dl/d/7D6C8FD6-EF77-437C-89E6-412EA8C3EEC6/mac_weather_station_raw_daily_2017.csv'
s6_url = 'https://de.cyverse.org/dl/d/233C21D5-1306-4028-9CF9-FF4AF0EAC405/mac_weather_station_raw_daily_2018.csv'
ksu_hourly_url = 'https://de.cyverse.org/dl/d/D80C07D7-5F68-4C86-B15A-9BAAF472D3A4/ksu_hourly_weather.csv'
ksu_daily_url = 'https://de.cyverse.org/dl/d/64805E3B-0460-4AA1-8D8A-2D7246E05B35/ashland_bottoms_daily_weather_2016.csv' 

### A. MAC Season 4

In [5]:
s4_0 = pd.read_csv(s4_url)
print(s4_0.shape)
s4_0.head(3)

(365, 28)


Unnamed: 0,year,day_of_year,station_number,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_speed_mean,wind_vector_magnitude,wind_vector_direction,wind_direction_std,max_wind_speed,heat_units,eto_azmet,eto_p_m,vapor_pressure_mean,dewpoint_mean
0,2017,1,6,13.6,9.3,11.8,92.7,69.2,83.5,0.23,...,3.5,2.6,188,43,10.9,0.2,1.0,1.2,1.16,9.0
1,2017,2,6,14.9,7.2,10.5,87.7,44.7,71.4,0.39,...,2.2,1.5,129,44,5.8,0.5,1.0,1.6,0.89,5.3
2,2017,3,6,13.9,3.2,9.0,97.0,60.6,81.9,0.24,...,1.0,0.1,349,78,3.3,0.2,0.6,0.9,0.93,5.8
3,2017,4,6,20.4,3.0,11.0,97.8,31.4,73.2,0.46,...,0.9,0.3,76,68,3.6,2.3,1.8,1.5,0.92,5.7
4,2017,5,6,20.9,4.0,12.3,95.5,39.8,71.0,0.53,...,1.5,0.8,253,54,7.1,2.5,2.2,1.8,0.98,6.6


##### Slice for season dates only and add date column

In [10]:
s4_1 = s4_0.loc[(s4_0.day_of_year >= 110) & (s4_0.day_of_year <= 259)]
season_4_date_range = pd.date_range(start='2017-04-20', end='2017-09-16')

In [11]:
s4_2 = s4_1.copy()
s4_2['date'] = season_4_date_range
# s4_2.tail(3)

Unnamed: 0,year,day_of_year,station_number,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_vector_magnitude,wind_vector_direction,wind_direction_std,max_wind_speed,heat_units,eto_azmet,eto_p_m,vapor_pressure_mean,dewpoint_mean,date
256,2017,257,6,39.5,22.8,31.4,50.6,17.8,32.9,3.29,...,3.6,203,34,13.6,15.1,8.6,9.2,1.45,12.5,2017-09-14
257,2017,258,6,36.2,21.4,28.5,63.7,14.2,33.7,2.82,...,2.1,192,42,9.9,14.2,7.7,7.4,1.2,9.3,2017-09-15
258,2017,259,6,36.3,18.2,27.6,51.4,16.7,29.9,2.8,...,1.4,168,47,8.0,12.8,7.0,6.5,1.07,7.8,2017-09-16


##### Add growing degree days

In [15]:
s4_3 = s4_2.copy()
s4_3['daily_gdd'] = (((s4_3['air_temp_max'] + s4_3['air_temp_min'])) / 2) - 10

In [17]:
s4_4 = s4_3.copy()
s4_4['gdd'] = np.rint(np.cumsum(s4_4['daily_gdd']))

In [19]:
s4_5 = s4_4.drop(labels='daily_gdd', axis=1)
# print(s4_5.shape)
# s4_5.head()

(150, 30)


Unnamed: 0,year,day_of_year,station_number,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_vector_direction,wind_direction_std,max_wind_speed,heat_units,eto_azmet,eto_p_m,vapor_pressure_mean,dewpoint_mean,date,gdd
109,2017,110,6,33.3,14.1,23.5,45.0,5.1,18.2,2.63,...,233,60,8.2,10.3,8.0,6.8,0.47,-3.7,2017-04-20,14.0
110,2017,111,6,34.4,11.1,24.0,46.5,5.5,17.2,2.82,...,274,52,8.5,9.4,8.5,7.4,0.43,-4.9,2017-04-21,26.0
111,2017,112,6,35.5,14.5,25.0,32.5,6.4,15.6,2.95,...,178,66,5.2,11.0,8.0,6.7,0.45,-4.2,2017-04-22,41.0
112,2017,113,6,37.0,12.6,26.5,48.0,6.8,17.5,3.25,...,221,57,8.2,10.3,8.3,7.5,0.52,-2.2,2017-04-23,56.0
113,2017,114,6,33.7,14.9,25.7,33.1,8.3,17.5,2.89,...,214,46,9.9,10.8,8.0,7.8,0.55,-1.7,2017-04-24,71.0


In [21]:
s4_6 = s4_5.copy()
s4_6['cum_precip'] = np.cumsum(s4_6.precip_total)

##### Add columns for water-deficit stress treatment 
- First water-deficit stress treatment: 2017-08-01 through 2017-08-14
- Second water-deficit stress treatment: 2017-08-15 through 2017-08-30

In [24]:
first_treatment_dates = pd.date_range(start='2017-08-01', end='2017-08-14')
second_treatment_dates = pd.date_range(start='2017-08-15', end='2017-08-30')
season_dates = s4_6.date.values

In [25]:
first_treatment_col = []

for d in season_dates:
    if d in first_treatment_dates:
        first_treatment_col.append(True)    
    else: 
        first_treatment_col.append(False)

In [26]:
second_treatment_col = []

for d in season_dates:
    if d in second_treatment_dates:
        second_treatment_col.append(True)
    else:
        second_treatment_col.append(False)

In [27]:
s4_7 = s4_6.copy()

s4_7['first_water_deficit_treatment'] = first_treatment_col
s4_7['second_water_deficit_treatment'] = second_treatment_col

##### Drop columns

In [28]:
# s4_7.columns

Index(['year', 'day_of_year', 'station_number', 'air_temp_max', 'air_temp_min',
       'air_temp_mean', 'rh_max', 'rh_min', 'rh_mean', 'vpd_mean',
       'solar_rad_total', 'precip_total', '4_in_soil_temp_max',
       '4_in_soil_temp_min', '4_in_soil_temp_mean', '20_in_soil_temp_max',
       '20_in_soil_temp_min', '20_in_soil_temp_mean', 'wind_speed_mean',
       'wind_vector_magnitude', 'wind_vector_direction', 'wind_direction_std',
       'max_wind_speed', 'heat_units', 'eto_azmet', 'eto_p_m',
       'vapor_pressure_mean', 'dewpoint_mean', 'date', 'gdd', 'cum_precip',
       'first_water_deficit_treatment', 'second_water_deficit_treatment'],
      dtype='object')

In [29]:
s4_cols_to_keep = ['day_of_year', 'air_temp_max', 'air_temp_min', 'air_temp_mean', 'rh_max', 'rh_min', 'rh_mean', 'vpd_mean', 
                'precip_total', 'date', 'gdd', 'cum_precip', 'first_water_deficit_treatment', 
                'second_water_deficit_treatment']

s4_8 = pd.DataFrame(data=s4_7, columns=s4_cols_to_keep)
# print(s4_8.shape)
# s4_8.head()

(150, 14)


Unnamed: 0,day_of_year,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,precip_total,date,gdd,cum_precip,first_water_deficit_treatment,second_water_deficit_treatment
109,110,33.3,14.1,23.5,45.0,5.1,18.2,2.63,0.0,2017-04-20,14.0,0.0,False,False
110,111,34.4,11.1,24.0,46.5,5.5,17.2,2.82,0.0,2017-04-21,26.0,0.0,False,False
111,112,35.5,14.5,25.0,32.5,6.4,15.6,2.95,0.0,2017-04-22,41.0,0.0,False,False
112,113,37.0,12.6,26.5,48.0,6.8,17.5,3.25,0.0,2017-04-23,56.0,0.0,False,False
113,114,33.7,14.9,25.7,33.1,8.3,17.5,2.89,0.0,2017-04-24,71.0,0.0,False,False


##### Rename some columns

In [30]:
s4_new_col_names = ['day_of_year', 'temp_max', 'temp_min', 'temp_mean', 'rh_max', 'rh_min', 'rh_mean', 'vpd_mean',
                   'precip', 'date', 'gdd', 'precip_cumulative', 'first_water_deficit_treatment', 'second_water_deficit_treatment']

s4_9 = s4_8.copy()
s4_9.columns = s4_new_col_names
# print(s4_9.shape)
# s4_9.tail()

(150, 14)


Unnamed: 0,day_of_year,temp_max,temp_min,temp_mean,rh_max,rh_min,rh_mean,vpd_mean,precip,date,gdd,precip_cumulative,first_water_deficit_treatment,second_water_deficit_treatment
254,255,42.8,24.2,34.0,58.0,13.3,28.1,4.14,0.0,2017-09-12,2955.0,50.79,False,False
255,256,41.3,24.3,33.9,54.1,14.3,27.3,4.08,0.0,2017-09-13,2978.0,50.79,False,False
256,257,39.5,22.8,31.4,50.6,17.8,32.9,3.29,0.0,2017-09-14,2999.0,50.79,False,False
257,258,36.2,21.4,28.5,63.7,14.2,33.7,2.82,0.0,2017-09-15,3018.0,50.79,False,False
258,259,36.3,18.2,27.6,51.4,16.7,29.9,2.8,0.0,2017-09-16,3035.0,50.79,False,False


##### Reorder columns

In [34]:
s4_new_col_order = ['date', 'day_of_year', 'temp_min', 'temp_max', 'temp_mean', 'gdd', 'rh_min', 'rh_max', 'rh_mean', 
                    'vpd_mean', 'precip', 'precip_cumulative', 'first_water_deficit_treatment', 'second_water_deficit_treatment']

In [36]:
s4_10 = s4_9[s4_new_col_order]
# print(s4_10.shape)
# s4_10.tail(3)

(150, 14)


Unnamed: 0,date,day_of_year,temp_min,temp_max,temp_mean,gdd,rh_min,rh_max,rh_mean,vpd_mean,precip,precip_cumulative,first_water_deficit_treatment,second_water_deficit_treatment
256,2017-09-14,257,22.8,39.5,31.4,2999.0,17.8,50.6,32.9,3.29,0.0,50.79,False,False
257,2017-09-15,258,21.4,36.2,28.5,3018.0,14.2,63.7,33.7,2.82,0.0,50.79,False,False
258,2017-09-16,259,18.2,36.3,27.6,3035.0,16.7,51.4,29.9,2.8,0.0,50.79,False,False


##### Round numeric columns to two decimal points, except for `gdd` which was already rounded to the nearest integer

In [39]:
s4_cols_to_round = ['temp_min', 'temp_max', 'temp_mean', 'rh_min', 'rh_max', 'rh_mean', 'vpd_mean', 'precip',
                    'precip_cumulative']

s4_11 = s4_10.copy()
s4_11[s4_cols_to_round] = s4_10[s4_cols_to_round].round(2)
print(s4_11.shape)
s4_11.head(3)

(150, 14)


Unnamed: 0,date,day_of_year,temp_min,temp_max,temp_mean,gdd,rh_min,rh_max,rh_mean,vpd_mean,precip,precip_cumulative,first_water_deficit_treatment,second_water_deficit_treatment
109,2017-04-20,110,14.1,33.3,23.5,14.0,5.1,45.0,18.2,2.63,0.0,0.0,False,False
110,2017-04-21,111,11.1,34.4,24.0,26.0,5.5,46.5,17.2,2.82,0.0,0.0,False,False
111,2017-04-22,112,14.5,35.5,25.0,41.0,6.4,32.5,15.6,2.95,0.0,0.0,False,False


### B. MAC Season 6

In [6]:
s6_0 = pd.read_csv(s6_url)
print(s6_0.shape)
s6_0.head(3)

(365, 28)


Unnamed: 0,year,day_of_year,station_num,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_speed_mean,wind_vec_mag,wind_vec_dir,wind_dir_std,max_wind_speed,heat_units,eto,eto_pm,vapor_pressure_mean,dewpoint_mean
0,2018,1,6,23.6,0.4,10.4,75.9,11.7,41.5,0.94,...,0.9,0.1,159,74,3.8,3.4,2.3,1.8,0.46,-4.0
1,2018,2,6,23.0,3.6,11.6,70.4,11.7,37.7,1.01,...,0.8,0.3,302,67,3.8,3.4,2.5,1.7,0.47,-3.7
2,2018,3,6,24.2,1.8,12.9,73.9,11.8,35.7,1.17,...,2.0,1.2,50,50,8.1,3.7,2.8,3.1,0.47,-3.6
3,2018,4,6,25.5,9.6,16.6,52.6,15.9,30.8,1.41,...,1.0,0.2,108,72,4.2,5.4,3.0,2.1,0.57,-1.2
4,2018,5,6,24.6,3.4,12.9,76.6,21.0,46.4,0.96,...,0.8,0.1,126,75,3.3,4.0,2.5,1.8,0.64,0.7


##### Slice for season dates only and add date column

In [12]:
s6_1 = s6_0.loc[(s6_0.day_of_year >= 115) & (s6_0.day_of_year <= 213)]
season_6_date_range = pd.date_range(start='2018-04-25', end='2018-08-01')

In [13]:
s6_2 = s6_1.copy()
s6_2['date'] = season_6_date_range
# s6_2.tail(3)

Unnamed: 0,year,day_of_year,station_num,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_vec_mag,wind_vec_dir,wind_dir_std,max_wind_speed,heat_units,eto,eto_pm,vapor_pressure_mean,dewpoint_mean,date
210,2018,211,6,41.9,27.2,34.2,64.9,22.3,41.4,3.44,...,1.0,215,66,12.5,16.7,8.7,9.1,2.12,18.3,2018-07-30
211,2018,212,6,40.9,26.0,33.7,79.8,18.8,42.1,3.35,...,0.6,154,65,8.4,16.3,7.6,7.4,2.03,17.6,2018-07-31
212,2018,213,6,42.9,28.9,36.0,64.7,14.9,36.3,4.05,...,1.5,154,52,14.7,17.1,8.6,8.9,2.02,17.4,2018-08-01


##### Add growing degree days

In [16]:
s6_3 = s6_2.copy()
s6_3['daily_gdd'] = (((s6_3['air_temp_max'] + s6_3['air_temp_min'])) / 2) - 10

In [18]:
s6_4 = s6_3.copy()
s6_4['gdd'] = np.rint(np.cumsum(s6_4['daily_gdd']))

In [20]:
s6_5 = s6_4.drop(labels='daily_gdd', axis=1)
# print(s6_5.shape)
# s6_5.head()

(99, 30)


Unnamed: 0,year,day_of_year,station_num,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_vec_dir,wind_dir_std,max_wind_speed,heat_units,eto,eto_pm,vapor_pressure_mean,dewpoint_mean,date,gdd
114,2018,115,6,36.6,17.1,27.2,31.0,6.5,16.8,3.26,...,185,57,5.6,12.4,7.7,6.7,0.56,-1.4,2018-04-25,17.0
115,2018,116,6,34.5,18.2,26.8,33.3,9.9,18.7,3.04,...,208,53,5.9,12.6,6.1,6.7,0.63,0.3,2018-04-26,33.0
116,2018,117,6,36.4,17.6,28.6,48.7,9.6,20.5,3.35,...,215,59,7.2,12.6,7.9,7.1,0.74,2.5,2018-04-27,50.0
117,2018,118,6,35.9,19.1,28.1,41.0,8.3,17.8,3.32,...,234,48,8.1,13.2,8.6,8.1,0.63,0.0,2018-04-28,68.0
118,2018,119,6,33.8,15.6,24.8,44.3,7.3,18.9,2.75,...,201,44,11.1,11.1,8.6,8.3,0.55,-1.7,2018-04-29,82.0


In [22]:
s6_6 = s6_5.copy()
s6_6['cum_precip'] = np.cumsum(s6_6.precip_total)

##### Add first and second water deficit treatment columns
- All values will be `False`

In [43]:
s6_7 = s6_6.copy()
s6_7['first_water_deficit_treatment'] = False
s6_7['second_water_deficit_treatment'] = False
# s6_7.head()

Unnamed: 0,year,day_of_year,station_num,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,heat_units,eto,eto_pm,vapor_pressure_mean,dewpoint_mean,date,gdd,cum_precip,first_water_deficit_treatment,second_water_deficit_treatment
114,2018,115,6,36.6,17.1,27.2,31.0,6.5,16.8,3.26,...,12.4,7.7,6.7,0.56,-1.4,2018-04-25,17.0,0.0,False,False
115,2018,116,6,34.5,18.2,26.8,33.3,9.9,18.7,3.04,...,12.6,6.1,6.7,0.63,0.3,2018-04-26,33.0,0.0,False,False
116,2018,117,6,36.4,17.6,28.6,48.7,9.6,20.5,3.35,...,12.6,7.9,7.1,0.74,2.5,2018-04-27,50.0,0.0,False,False
117,2018,118,6,35.9,19.1,28.1,41.0,8.3,17.8,3.32,...,13.2,8.6,8.1,0.63,0.0,2018-04-28,68.0,0.0,False,False
118,2018,119,6,33.8,15.6,24.8,44.3,7.3,18.9,2.75,...,11.1,8.6,8.3,0.55,-1.7,2018-04-29,82.0,0.0,False,False


##### Drop some columns

In [47]:
s6_cols_to_keep = ['day_of_year', 'air_temp_max', 'air_temp_min', 'air_temp_mean', 'rh_max', 'rh_min', 'rh_mean', 'vpd_mean', 
                   'precip_total', 'date', 'gdd', 'cum_precip', 'first_water_deficit_treatment', 
                   'second_water_deficit_treatment']
s6_8 = pd.DataFrame(data=s6_7, columns=s6_cols_to_keep)
# print(s6_8.shape)
# s6_8.head()

(99, 14)


Unnamed: 0,day_of_year,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,precip_total,date,gdd,cum_precip,first_water_deficit_treatment,second_water_deficit_treatment
114,115,36.6,17.1,27.2,31.0,6.5,16.8,3.26,0.0,2018-04-25,17.0,0.0,False,False
115,116,34.5,18.2,26.8,33.3,9.9,18.7,3.04,0.0,2018-04-26,33.0,0.0,False,False
116,117,36.4,17.6,28.6,48.7,9.6,20.5,3.35,0.0,2018-04-27,50.0,0.0,False,False
117,118,35.9,19.1,28.1,41.0,8.3,17.8,3.32,0.0,2018-04-28,68.0,0.0,False,False
118,119,33.8,15.6,24.8,44.3,7.3,18.9,2.75,0.0,2018-04-29,82.0,0.0,False,False


##### Rename columns

In [48]:
s6_new_col_names = ['day_of_year', 'temp_max', 'temp_min', 'temp_mean', 'rh_max', 'rh_min', 'rh_mean', 'vpd_mean',
                   'precip', 'date', 'gdd', 'precip_cumulative', 'first_water_deficit_treatment', 'second_water_deficit_treatment']

s6_9 = s6_8.copy()
s6_9.columns = s6_new_col_names
# print(s6_9.shape)
# s6_9.tail()

(99, 14)


Unnamed: 0,day_of_year,temp_max,temp_min,temp_mean,rh_max,rh_min,rh_mean,vpd_mean,precip,date,gdd,precip_cumulative,first_water_deficit_treatment,second_water_deficit_treatment
208,209,41.6,28.5,34.8,64.4,22.1,37.2,3.7,0.0,2018-07-28,1877.0,5.08,False,False
209,210,38.2,25.6,31.8,85.2,26.5,50.6,2.54,0.0,2018-07-29,1899.0,5.08,False,False
210,211,41.9,27.2,34.2,64.9,22.3,41.4,3.44,0.0,2018-07-30,1923.0,5.08,False,False
211,212,40.9,26.0,33.7,79.8,18.8,42.1,3.35,0.0,2018-07-31,1947.0,5.08,False,False
212,213,42.9,28.9,36.0,64.7,14.9,36.3,4.05,0.0,2018-08-01,1973.0,5.08,False,False


##### Reorder columns

In [50]:
s6_new_col_order = ['date', 'day_of_year', 'temp_min', 'temp_max', 'temp_mean', 'gdd', 'rh_min', 'rh_max', 'rh_mean', 
                    'vpd_mean', 'precip', 'precip_cumulative', 'first_water_deficit_treatment', 'second_water_deficit_treatment']

In [51]:
s6_10 = s6_9[s6_new_col_order]
# print(s6_10.shape)
# s6_10.tail(3)

(99, 14)


Unnamed: 0,date,day_of_year,temp_min,temp_max,temp_mean,gdd,rh_min,rh_max,rh_mean,vpd_mean,precip,precip_cumulative,first_water_deficit_treatment,second_water_deficit_treatment
210,2018-07-30,211,27.2,41.9,34.2,1923.0,22.3,64.9,41.4,3.44,0.0,5.08,False,False
211,2018-07-31,212,26.0,40.9,33.7,1947.0,18.8,79.8,42.1,3.35,0.0,5.08,False,False
212,2018-08-01,213,28.9,42.9,36.0,1973.0,14.9,64.7,36.3,4.05,0.0,5.08,False,False


##### Round numeric columns to two decimal points, except for `gdd` which was already rounded to the nearest integer

In [52]:
s6_cols_to_round = ['temp_min', 'temp_max', 'temp_mean', 'rh_min', 'rh_max', 'rh_mean', 'vpd_mean', 'precip',
                    'precip_cumulative']

s6_11 = s6_10.copy()
s6_11[s6_cols_to_round] = s6_10[s6_cols_to_round].round(2)
print(s6_11.shape)
s6_11.head(3)

(99, 14)


Unnamed: 0,date,day_of_year,temp_min,temp_max,temp_mean,gdd,rh_min,rh_max,rh_mean,vpd_mean,precip,precip_cumulative,first_water_deficit_treatment,second_water_deficit_treatment
114,2018-04-25,115,17.1,36.6,27.2,17.0,6.5,31.0,16.8,3.26,0.0,0.0,False,False
115,2018-04-26,116,18.2,34.5,26.8,33.0,9.9,33.3,18.7,3.04,0.0,0.0,False,False
116,2018-04-27,117,17.6,36.4,28.6,50.0,9.6,48.7,20.5,3.35,0.0,0.0,False,False


### C. KSU Hourly
- to calculate daily vapor pressure deficit mean values

In [62]:
kh_0 = pd.read_csv(ksu_hourly_url)
print(kh_0.shape)
kh_0.head(3)

(3056, 10)


Unnamed: 0,Timestamp,Station,AirTemperature,RelativeHumidity,Precipitation,WindDirection2m,WindSpeed2m,SoilTemperature5cm,SoilTemperature10cm,SolarRadiation
0,,,avg,avg,total,vector,avg,avg,avg,avg
1,,,°C,%,mm,degrees,m/s,°C,°C,W/m²
2,2016-06-17 00:00,Ashland Bottoms,27.0,84.6,0,55.4,1.2,26.0,24.8,0


In [63]:
kh_1 = kh_0.iloc[2:]
# kh_1.head(3)

In [64]:
kh_2 = kh_1.copy()
kh_2['AirTemperature'] = pd.to_numeric(kh_2['AirTemperature'], errors='coerce')

In [65]:
kh_3 = kh_2.copy()
kh_3['RelativeHumidity'] = pd.to_numeric(kh_3['RelativeHumidity'], errors='coerce')
# print(kh_3.shape)
# print(kh_3.dtypes)
# kh_3.head()

(3054, 10)
Timestamp               object
Station                 object
AirTemperature         float64
RelativeHumidity       float64
Precipitation           object
WindDirection2m         object
WindSpeed2m             object
SoilTemperature5cm      object
SoilTemperature10cm     object
SolarRadiation          object
dtype: object


Unnamed: 0,Timestamp,Station,AirTemperature,RelativeHumidity,Precipitation,WindDirection2m,WindSpeed2m,SoilTemperature5cm,SoilTemperature10cm,SolarRadiation
2,2016-06-17 00:00,Ashland Bottoms,27.0,84.6,0,55.4,1.2,26.0,24.8,0
3,2016-06-17 01:00,Ashland Bottoms,26.7,85.7,0,47.0,0.8,26.0,24.7,0
4,2016-06-17 02:00,Ashland Bottoms,26.4,85.6,0,80.4,0.2,25.7,24.6,0
5,2016-06-17 03:00,Ashland Bottoms,26.0,85.7,0,227.5,0.3,25.5,24.5,0
6,2016-06-17 04:00,Ashland Bottoms,24.8,91.0,0,319.6,0.3,25.3,24.4,0


In [66]:
kh_4 = kh_3.copy()
kh_4['vpd_mean'] = calculate_vpd(kh_4['AirTemperature'], kh_4['RelativeHumidity'])
# print(kh_4.shape)
# kh_4.head()

(3054, 11)


Unnamed: 0,Timestamp,Station,AirTemperature,RelativeHumidity,Precipitation,WindDirection2m,WindSpeed2m,SoilTemperature5cm,SoilTemperature10cm,SolarRadiation,vpd_mean
2,2016-06-17 00:00,Ashland Bottoms,27.0,84.6,0,55.4,1.2,26.0,24.8,0,0.562333
3,2016-06-17 01:00,Ashland Bottoms,26.7,85.7,0,47.0,0.8,26.0,24.7,0,0.512803
4,2016-06-17 02:00,Ashland Bottoms,26.4,85.6,0,80.4,0.2,25.7,24.6,0,0.507111
5,2016-06-17 03:00,Ashland Bottoms,26.0,85.7,0,227.5,0.3,25.5,24.5,0,0.491533
6,2016-06-17 04:00,Ashland Bottoms,24.8,91.0,0,319.6,0.3,25.3,24.4,0,0.287554


In [67]:
# kh_4.isnull().sum()

Timestamp              6
Station                6
AirTemperature         6
RelativeHumidity       6
Precipitation          0
WindDirection2m        0
WindSpeed2m            0
SoilTemperature5cm     0
SoilTemperature10cm    0
SolarRadiation         0
vpd_mean               6
dtype: int64

In [68]:
kh_5 = kh_4.dropna(how='any', axis=0)
# print(kh_5.shape)

(3048, 11)


In [69]:
just_dates = []

for timestamp in kh_5['Timestamp'].values:
    date = timestamp[:10]
    just_dates.append(date)

In [70]:
kh_6 = kh_5.copy()
kh_6['date'] = just_dates

# kh_6.tail()

Unnamed: 0,Timestamp,Station,AirTemperature,RelativeHumidity,Precipitation,WindDirection2m,WindSpeed2m,SoilTemperature5cm,SoilTemperature10cm,SolarRadiation,vpd_mean,date
3051,2016-10-21 19:00,Ashland Bottoms,14.7,49.8,0,156.2,1.3,15.1,15.3,0,0.846333,2016-10-21
3052,2016-10-21 20:00,Ashland Bottoms,13.4,54.7,0,168.2,1.3,14.9,15.3,0,0.701097,2016-10-21
3053,2016-10-21 21:00,Ashland Bottoms,12.8,55.7,0,157.9,1.6,14.7,15.3,0,0.6589,2016-10-21
3054,2016-10-21 22:00,Ashland Bottoms,11.8,59.3,0,148.8,1.4,14.6,15.2,0,0.566337,2016-10-21
3055,2016-10-21 23:00,Ashland Bottoms,11.2,61.2,0,153.6,1.7,14.5,15.1,0,0.518626,2016-10-21


In [71]:
kh_7 = kh_6.groupby(['date'], as_index=False)['vpd_mean'].mean()
# print(kh_7.shape)
# kh_7.head(3)

(127, 2)


Unnamed: 0,date,vpd_mean
0,2016-06-17,1.795359
1,2016-06-18,1.359035
2,2016-06-19,1.637793


##### Add minimum and maximum relative humidity

In [108]:
kh_8 = kh_6.groupby(['date']).agg(rh_min=('RelativeHumidity', 'min'), rh_max=('RelativeHumidity', 'max')).reset_index()
# print(kh_8.shape)
# kh_8.head(3)

(127, 3)


Unnamed: 0,date,rh_min,rh_max
0,2016-06-17,37.2,93.5
1,2016-06-18,42.3,88.8
2,2016-06-19,41.6,80.3


In [109]:
kh_9 = kh_7.merge(kh_8, how='left', left_on='date', right_on='date')
print(kh_9.shape)
kh_9.tail(3)

(127, 4)


Unnamed: 0,date,vpd_mean,rh_min,rh_max
124,2016-10-19,0.694933,43.0,96.3
125,2016-10-20,0.416545,42.4,93.4
126,2016-10-21,0.569678,35.1,98.3


### D. Kansas Daily

In [74]:
ksu_0 = pd.read_csv(ksu_daily_url)
print(ksu_0.shape)
ksu_0.head(3)

(129, 16)


Unnamed: 0.1,Unnamed: 0,Timestamp,Station,AirTemperature,AirTemperature.1,RelativeHumidity,Precipitation,WindSpeed2m,WindSpeed2m.1,SoilTemperature5cm,SoilTemperature5cm.1,SoilTemperature10cm,SoilTemperature10cm.1,SolarRadiation,ETo,ETo.1
0,,,,max,min,avg,total,avg,max,max,min,max,min,total,grass,alfalfa
1,,,,°C,°C,%,mm,m/s,m/s,°C,°C,°C,°C,MJ/m²,mm,mm
2,,2016-06-17,Ashland Bottoms,37.8,24,66.6,0,2.3,9,27.5,25,25.7,24,30.4,7.96,9.92


In [75]:
ksu_1 = ksu_0.iloc[2:]

In [110]:
ksu_2 = ksu_1.merge(kh_9, how='left', left_on='Timestamp', right_on='date')
print(ksu_2.shape)
ksu_2.head()

(127, 20)


Unnamed: 0.1,Unnamed: 0,Timestamp,Station,AirTemperature,AirTemperature.1,RelativeHumidity,Precipitation,WindSpeed2m,WindSpeed2m.1,SoilTemperature5cm,SoilTemperature5cm.1,SoilTemperature10cm,SoilTemperature10cm.1,SolarRadiation,ETo,ETo.1,date,vpd_mean,rh_min,rh_max
0,,2016-06-17,Ashland Bottoms,37.8,24.0,66.6,0.0,2.3,9.0,27.5,25.0,25.7,24.0,30.4,7.96,9.92,2016-06-17,1.795359,37.2,93.5
1,,2016-06-18,Ashland Bottoms,33.1,21.7,66.4,5.33,2.8,12.1,26.7,24.5,25.4,24.3,22.7,6.33,8.26,2016-06-18,1.359035,42.3,88.8
2,,2016-06-19,Ashland Bottoms,35.3,21.9,62.5,0.0,2.9,7.4,26.7,24.0,25.2,23.7,29.3,7.7,9.9,2016-06-19,1.637793,41.6,80.3
3,,2016-06-20,Ashland Bottoms,37.3,23.7,59.5,0.0,2.6,8.9,28.2,24.9,26.3,24.1,30.2,8.12,10.33,2016-06-20,2.016626,39.3,81.4
4,,2016-06-21,Ashland Bottoms,38.5,22.6,53.7,0.0,2.4,9.1,28.0,25.2,26.2,24.5,30.7,8.35,10.73,2016-06-21,2.406509,29.6,86.1


##### Convert temperature, relative humidity, and precipitation values to numeric

In [117]:
# ksu_2.dtypes

In [116]:
ksu_numeric_cols = ['AirTemperature', 'AirTemperature.1', 'RelativeHumidity', 'Precipitation']
ksu_3 = ksu_2.copy()
ksu_3[ksu_numeric_cols] = ksu_3[ksu_numeric_cols].apply(pd.to_numeric, errors='coerce')
# print(ksu_3.shape)
# print(ksu_3.dtypes)
# ksu_3.tail()

(127, 20)
Unnamed: 0               float64
Timestamp                 object
Station                   object
AirTemperature           float64
AirTemperature.1         float64
RelativeHumidity         float64
Precipitation            float64
WindSpeed2m               object
WindSpeed2m.1             object
SoilTemperature5cm        object
SoilTemperature5cm.1      object
SoilTemperature10cm       object
SoilTemperature10cm.1     object
SolarRadiation            object
ETo                       object
ETo.1                     object
date                      object
vpd_mean                 float64
rh_min                   float64
rh_max                   float64
dtype: object


Unnamed: 0.1,Unnamed: 0,Timestamp,Station,AirTemperature,AirTemperature.1,RelativeHumidity,Precipitation,WindSpeed2m,WindSpeed2m.1,SoilTemperature5cm,SoilTemperature5cm.1,SoilTemperature10cm,SoilTemperature10cm.1,SolarRadiation,ETo,ETo.1,date,vpd_mean,rh_min,rh_max
122,,2016-10-17,Ashland Bottoms,32.5,19.6,59.4,0.0,3.9,12.8,21.1,19.1,19.2,18.3,17.1,5.55,8.33,2016-10-17,1.429893,38.0,80.2
123,,2016-10-18,Ashland Bottoms,23.5,9.7,57.1,0.0,1.8,7.2,19.4,16.9,19.0,17.4,18.0,2.98,4.1,2016-10-18,0.958322,38.4,87.1
124,,2016-10-19,Ashland Bottoms,23.4,7.4,67.2,0.0,2.1,8.2,17.6,15.4,17.6,16.2,15.7,2.88,4.07,2016-10-19,0.694933,43.0,96.3
125,,2016-10-20,Ashland Bottoms,16.7,5.1,74.6,2.03,2.0,8.1,16.6,14.4,16.8,15.7,13.8,2.21,3.1,2016-10-20,0.416545,42.4,93.4
126,,2016-10-21,Ashland Bottoms,20.1,1.5,67.4,0.0,1.8,7.3,15.4,12.6,15.7,14.2,16.8,2.58,3.7,2016-10-21,0.569678,35.1,98.3


##### Add growing degree days

In [118]:
ksu_4 = ksu_3.copy()
ksu_4['daily_gdd'] = (((ksu_4['AirTemperature'] + ksu_4['AirTemperature.1'])) / 2) - 10

In [119]:
ksu_5 = ksu_4.copy()
ksu_5['gdd'] = np.rint(np.cumsum(ksu_5['daily_gdd']))

In [120]:
ksu_6 = ksu_5.drop(labels='daily_gdd', axis=1)
# print(ksu_6.shape)
# ksu_6.head()

(127, 21)


Unnamed: 0.1,Unnamed: 0,Timestamp,Station,AirTemperature,AirTemperature.1,RelativeHumidity,Precipitation,WindSpeed2m,WindSpeed2m.1,SoilTemperature5cm,...,SoilTemperature10cm,SoilTemperature10cm.1,SolarRadiation,ETo,ETo.1,date,vpd_mean,rh_min,rh_max,gdd
0,,2016-06-17,Ashland Bottoms,37.8,24.0,66.6,0.0,2.3,9.0,27.5,...,25.7,24.0,30.4,7.96,9.92,2016-06-17,1.795359,37.2,93.5,21.0
1,,2016-06-18,Ashland Bottoms,33.1,21.7,66.4,5.33,2.8,12.1,26.7,...,25.4,24.3,22.7,6.33,8.26,2016-06-18,1.359035,42.3,88.8,38.0
2,,2016-06-19,Ashland Bottoms,35.3,21.9,62.5,0.0,2.9,7.4,26.7,...,25.2,23.7,29.3,7.7,9.9,2016-06-19,1.637793,41.6,80.3,57.0
3,,2016-06-20,Ashland Bottoms,37.3,23.7,59.5,0.0,2.6,8.9,28.2,...,26.3,24.1,30.2,8.12,10.33,2016-06-20,2.016626,39.3,81.4,77.0
4,,2016-06-21,Ashland Bottoms,38.5,22.6,53.7,0.0,2.4,9.1,28.0,...,26.2,24.5,30.7,8.35,10.73,2016-06-21,2.406509,29.6,86.1,98.0


##### Add cumulative precipitation

In [121]:
ksu_7 = ksu_6.copy()
ksu_7['precip_cumulative'] = np.cumsum(ksu_7['Precipitation'])

##### Add first and second water deficit treatment columns
- All values will be `False`

In [122]:
ksu_8 = ksu_7.copy()
ksu_8['first_water_deficit_treatment'] = False
ksu_8['second_water_deficit_treatment'] = False
ksu_8.head()

Unnamed: 0.1,Unnamed: 0,Timestamp,Station,AirTemperature,AirTemperature.1,RelativeHumidity,Precipitation,WindSpeed2m,WindSpeed2m.1,SoilTemperature5cm,...,ETo,ETo.1,date,vpd_mean,rh_min,rh_max,gdd,precip_cumulative,first_water_deficit_treatment,second_water_deficit_treatment
0,,2016-06-17,Ashland Bottoms,37.8,24.0,66.6,0.0,2.3,9.0,27.5,...,7.96,9.92,2016-06-17,1.795359,37.2,93.5,21.0,0.0,False,False
1,,2016-06-18,Ashland Bottoms,33.1,21.7,66.4,5.33,2.8,12.1,26.7,...,6.33,8.26,2016-06-18,1.359035,42.3,88.8,38.0,5.33,False,False
2,,2016-06-19,Ashland Bottoms,35.3,21.9,62.5,0.0,2.9,7.4,26.7,...,7.7,9.9,2016-06-19,1.637793,41.6,80.3,57.0,5.33,False,False
3,,2016-06-20,Ashland Bottoms,37.3,23.7,59.5,0.0,2.6,8.9,28.2,...,8.12,10.33,2016-06-20,2.016626,39.3,81.4,77.0,5.33,False,False
4,,2016-06-21,Ashland Bottoms,38.5,22.6,53.7,0.0,2.4,9.1,28.0,...,8.35,10.73,2016-06-21,2.406509,29.6,86.1,98.0,5.33,False,False


##### Drop some columns

In [123]:
ksu_8.columns

Index(['Unnamed: 0', 'Timestamp', 'Station', 'AirTemperature',
       'AirTemperature.1', 'RelativeHumidity', 'Precipitation', 'WindSpeed2m',
       'WindSpeed2m.1', 'SoilTemperature5cm', 'SoilTemperature5cm.1',
       'SoilTemperature10cm', 'SoilTemperature10cm.1', 'SolarRadiation', 'ETo',
       'ETo.1', 'date', 'vpd_mean', 'rh_min', 'rh_max', 'gdd',
       'precip_cumulative', 'first_water_deficit_treatment',
       'second_water_deficit_treatment'],
      dtype='object')

In [124]:
ksu_cols_to_keep = ['AirTemperature', 'AirTemperature.1', 'RelativeHumidity', 'Precipitation', 'date', 'vpd_mean', 
                    'rh_min', 'rh_max', 'gdd', 'precip_cumulative', 'first_water_deficit_treatment', 
                    'second_water_deficit_treatment']
ksu_9 = pd.DataFrame(data=ksu_8, columns=ksu_cols_to_keep)
print(ksu_9.shape)
ksu_9.head()

(127, 12)


Unnamed: 0,AirTemperature,AirTemperature.1,RelativeHumidity,Precipitation,date,vpd_mean,rh_min,rh_max,gdd,precip_cumulative,first_water_deficit_treatment,second_water_deficit_treatment
0,37.8,24.0,66.6,0.0,2016-06-17,1.795359,37.2,93.5,21.0,0.0,False,False
1,33.1,21.7,66.4,5.33,2016-06-18,1.359035,42.3,88.8,38.0,5.33,False,False
2,35.3,21.9,62.5,0.0,2016-06-19,1.637793,41.6,80.3,57.0,5.33,False,False
3,37.3,23.7,59.5,0.0,2016-06-20,2.016626,39.3,81.4,77.0,5.33,False,False
4,38.5,22.6,53.7,0.0,2016-06-21,2.406509,29.6,86.1,98.0,5.33,False,False


##### Add `day_of_year` and `temp_mean` columns

In [125]:
days_of_year = [i for i in range(169, 296)]

In [127]:
ksu_10 = ksu_9.copy()
ksu_10['day_of_year'] = days_of_year

In [138]:
ksu_11 = ksu_10.copy()
ksu_11['temp_mean'] = ksu_11[['AirTemperature', 'AirTemperature.1']].mean(axis=1)
print(ksu_11.shape)
ksu_11.head()

(127, 14)


Unnamed: 0,AirTemperature,AirTemperature.1,RelativeHumidity,Precipitation,date,vpd_mean,rh_min,rh_max,gdd,precip_cumulative,first_water_deficit_treatment,second_water_deficit_treatment,day_of_year,temp_mean
0,37.8,24.0,66.6,0.0,2016-06-17,1.795359,37.2,93.5,21.0,0.0,False,False,169,30.9
1,33.1,21.7,66.4,5.33,2016-06-18,1.359035,42.3,88.8,38.0,5.33,False,False,170,27.4
2,35.3,21.9,62.5,0.0,2016-06-19,1.637793,41.6,80.3,57.0,5.33,False,False,171,28.6
3,37.3,23.7,59.5,0.0,2016-06-20,2.016626,39.3,81.4,77.0,5.33,False,False,172,30.5
4,38.5,22.6,53.7,0.0,2016-06-21,2.406509,29.6,86.1,98.0,5.33,False,False,173,30.55


##### Rename columns

In [139]:
ksu_new_col_names = ['temp_max', 'temp_min', 'rh_mean', 'precip', 'date', 'vpd_mean', 'rh_min', 'rh_max', 'gdd',
                    'precip_cumulative', 'first_water_deficit_treatment', 'second_water_deficit_treatment',
                    'day_of_year', 'temp_mean']
ksu_12 = ksu_11.copy()
ksu_12.columns = ksu_new_col_names
print(ksu_12.shape)
ksu_12.tail()

(127, 14)


Unnamed: 0,temp_max,temp_min,rh_mean,precip,date,vpd_mean,rh_min,rh_max,gdd,precip_cumulative,first_water_deficit_treatment,second_water_deficit_treatment,day_of_year,temp_mean
122,32.5,19.6,59.4,0.0,2016-10-17,1.429893,38.0,80.2,1727.0,525.52,False,False,291,26.05
123,23.5,9.7,57.1,0.0,2016-10-18,0.958322,38.4,87.1,1733.0,525.52,False,False,292,16.6
124,23.4,7.4,67.2,0.0,2016-10-19,0.694933,43.0,96.3,1739.0,525.52,False,False,293,15.4
125,16.7,5.1,74.6,2.03,2016-10-20,0.416545,42.4,93.4,1739.0,527.55,False,False,294,10.9
126,20.1,1.5,67.4,0.0,2016-10-21,0.569678,35.1,98.3,1740.0,527.55,False,False,295,10.8


##### Reorder columns

In [140]:
ksu_new_col_order = ['date', 'day_of_year', 'temp_min', 'temp_max', 'temp_mean', 'gdd', 'rh_min', 'rh_max', 'rh_mean', 
                    'vpd_mean', 'precip', 'precip_cumulative', 'first_water_deficit_treatment', 'second_water_deficit_treatment']

In [141]:
ksu_13 = ksu_12[ksu_new_col_order]

##### Round numeric columns to two decimal points, except for `gdd` which was already rounded to the nearest integer

In [142]:
ksu_cols_to_round = ['temp_min', 'temp_max', 'temp_mean', 'rh_min', 'rh_max', 'rh_mean', 'vpd_mean', 'precip',
                    'precip_cumulative']

ksu_14 = ksu_13.copy()
ksu_14[ksu_cols_to_round] = ksu_14[ksu_cols_to_round].round(2)
print(ksu_14.shape)
ksu_14.head(3)

(127, 14)


Unnamed: 0,date,day_of_year,temp_min,temp_max,temp_mean,gdd,rh_min,rh_max,rh_mean,vpd_mean,precip,precip_cumulative,first_water_deficit_treatment,second_water_deficit_treatment
0,2016-06-17,169,24.0,37.8,30.9,21.0,37.2,93.5,66.6,1.8,0.0,0.0,False,False
1,2016-06-18,170,21.7,33.1,27.4,38.0,42.3,88.8,66.4,1.36,5.33,5.33,False,False
2,2016-06-19,171,21.9,35.3,28.6,57.0,41.6,80.3,62.5,1.64,0.0,5.33,False,False


##### Save weather data to .csv format

In [None]:
list_of_dfs = [s4_11, s6_11, ksu_14]
list_of_output_filenames = ['data/weather/mac_season_4_weather.csv', 'data/weather/mac_season_6_weather.csv',
                           'data/weather/ksu_weather.csv']

save_to_csv(list_of_dfs, list_of_output_filenames)