### MAC Season 4 Weather Data
- This notebook contains the code used to process raw weather station data from Maricopa Agricultural Center
- Data modified or added
    - sliced for season 4 dates only
    - added dates
    - added cumulative growing degree days
    - added cumulative precipitation
    - added First and Second drought treatment boolean columns
        - `True` or `False` for dates that received first treatment
        - `True` or `False` for dates that received second treatment

In [5]:
import datetime
import numpy as np
import pandas as pd

In [6]:
# Read in season 4 weather data from MAC weather station

weather_df_0 = pd.read_csv('data/raw/mac_weather_station_raw_daily_2017.csv')
print(weather_df_0.shape)
# weather_df_0.head(3)

(365, 28)


Unnamed: 0,year,day_of_year,station_number,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_speed_mean,wind_vector_magnitude,wind_vector_direction,wind_direction_std,max_wind_speed,heat_units,eto_azmet,eto_p_m,vapor_pressure_mean,dewpoint_mean
0,2017,1,6,13.6,9.3,11.8,92.7,69.2,83.5,0.23,...,3.5,2.6,188,43,10.9,0.2,1.0,1.2,1.16,9.0
1,2017,2,6,14.9,7.2,10.5,87.7,44.7,71.4,0.39,...,2.2,1.5,129,44,5.8,0.5,1.0,1.6,0.89,5.3
2,2017,3,6,13.9,3.2,9.0,97.0,60.6,81.9,0.24,...,1.0,0.1,349,78,3.3,0.2,0.6,0.9,0.93,5.8


#### Slice dataframe for season dates only and add date column

- Planting Date: 2017-04-20, Day 110
- Last Day of Harvest: 2017-09-16, Day 259

In [8]:
weather_df_1 = weather_df_0.loc[(weather_df_0.day_of_year >= 110) & (weather_df_0.day_of_year <= 259)]
print(weather_df_1.shape)
# weather_df_1.head(3)

(150, 28)


Unnamed: 0,year,day_of_year,station_number,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_speed_mean,wind_vector_magnitude,wind_vector_direction,wind_direction_std,max_wind_speed,heat_units,eto_azmet,eto_p_m,vapor_pressure_mean,dewpoint_mean
109,2017,110,6,33.3,14.1,23.5,45.0,5.1,18.2,2.63,...,1.9,0.8,233,60,8.2,10.3,8.0,6.8,0.47,-3.7
110,2017,111,6,34.4,11.1,24.0,46.5,5.5,17.2,2.82,...,2.2,1.3,274,52,8.5,9.4,8.5,7.4,0.43,-4.9
111,2017,112,6,35.5,14.5,25.0,32.5,6.4,15.6,2.95,...,1.6,0.5,178,66,5.2,11.0,8.0,6.7,0.45,-4.2


In [9]:
season_4_date_range = pd.date_range(start='2017-04-20', end='2017-09-16')

In [10]:
weather_df_2 = weather_df_1.copy()
weather_df_2['date'] = season_4_date_range
print(weather_df_2.shape)
# weather_df_2.tail(3)

(150, 29)


Unnamed: 0,year,day_of_year,station_number,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_vector_magnitude,wind_vector_direction,wind_direction_std,max_wind_speed,heat_units,eto_azmet,eto_p_m,vapor_pressure_mean,dewpoint_mean,date
256,2017,257,6,39.5,22.8,31.4,50.6,17.8,32.9,3.29,...,3.6,203,34,13.6,15.1,8.6,9.2,1.45,12.5,2017-09-14
257,2017,258,6,36.2,21.4,28.5,63.7,14.2,33.7,2.82,...,2.1,192,42,9.9,14.2,7.7,7.4,1.2,9.3,2017-09-15
258,2017,259,6,36.3,18.2,27.6,51.4,16.7,29.9,2.8,...,1.4,168,47,8.0,12.8,7.0,6.5,1.07,7.8,2017-09-16


#### Add Growing Degree Days
- 10 degrees Celsius is base temp for sorghum
- Daily gdd value = ((max temp + min temp) / 2) - 10 (base temp)
- Growing Degree Days = cumulative sum of daily gdd values
- Negative values convert to 0

In [11]:
weather_df_3 = weather_df_2.copy()
weather_df_3['daily_gdd'] = (((weather_df_3['air_temp_max'] + weather_df_3['air_temp_min'])) / 2) - 10
print(weather_df_3.shape)
# weather_df_3.head(3)

(150, 30)


Unnamed: 0,year,day_of_year,station_number,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_vector_direction,wind_direction_std,max_wind_speed,heat_units,eto_azmet,eto_p_m,vapor_pressure_mean,dewpoint_mean,date,daily_gdd
109,2017,110,6,33.3,14.1,23.5,45.0,5.1,18.2,2.63,...,233,60,8.2,10.3,8.0,6.8,0.47,-3.7,2017-04-20,13.7
110,2017,111,6,34.4,11.1,24.0,46.5,5.5,17.2,2.82,...,274,52,8.5,9.4,8.5,7.4,0.43,-4.9,2017-04-21,12.75
111,2017,112,6,35.5,14.5,25.0,32.5,6.4,15.6,2.95,...,178,66,5.2,11.0,8.0,6.7,0.45,-4.2,2017-04-22,15.0


In [12]:
# Check for negative values - will return empty df if there are none

weather_df_3.loc[weather_df_3.daily_gdd < 0]

Unnamed: 0,year,day_of_year,station_number,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_vector_direction,wind_direction_std,max_wind_speed,heat_units,eto_azmet,eto_p_m,vapor_pressure_mean,dewpoint_mean,date,daily_gdd


In [13]:
weather_df_4 = weather_df_3.copy()
weather_df_4['gdd'] = np.rint(np.cumsum(weather_df_4['daily_gdd']))
print(weather_df_4.shape)
# weather_df_4.tail(3)

(150, 31)


Unnamed: 0,year,day_of_year,station_number,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_direction_std,max_wind_speed,heat_units,eto_azmet,eto_p_m,vapor_pressure_mean,dewpoint_mean,date,daily_gdd,gdd
256,2017,257,6,39.5,22.8,31.4,50.6,17.8,32.9,3.29,...,34,13.6,15.1,8.6,9.2,1.45,12.5,2017-09-14,21.15,2999.0
257,2017,258,6,36.2,21.4,28.5,63.7,14.2,33.7,2.82,...,42,9.9,14.2,7.7,7.4,1.2,9.3,2017-09-15,18.8,3018.0
258,2017,259,6,36.3,18.2,27.6,51.4,16.7,29.9,2.8,...,47,8.0,12.8,7.0,6.5,1.07,7.8,2017-09-16,17.25,3035.0


In [14]:
# Drop daily gdd value, keep only cumulative gdd values

weather_df_5 = weather_df_4.drop(labels='daily_gdd', axis=1)
print(weather_df_5.shape)
# weather_df_5.tail()

(150, 30)


In [15]:
weather_df_6 = weather_df_5.copy()
weather_df_6['cum_precip'] = np.round(np.cumsum(weather_df_6.precip_total), 2)
# weather_df_6.head(3)

Unnamed: 0,year,day_of_year,station_number,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,wind_direction_std,max_wind_speed,heat_units,eto_azmet,eto_p_m,vapor_pressure_mean,dewpoint_mean,date,gdd,cum_precip
109,2017,110,6,33.3,14.1,23.5,45.0,5.1,18.2,2.63,...,60,8.2,10.3,8.0,6.8,0.47,-3.7,2017-04-20,14.0,0.0
110,2017,111,6,34.4,11.1,24.0,46.5,5.5,17.2,2.82,...,52,8.5,9.4,8.5,7.4,0.43,-4.9,2017-04-21,26.0,0.0
111,2017,112,6,35.5,14.5,25.0,32.5,6.4,15.6,2.95,...,66,5.2,11.0,8.0,6.7,0.45,-4.2,2017-04-22,41.0,0.0


#### Add columns for water-deficit stress treatment 
- First water-deficit stress treatment: 2017-08-01 through 2017-08-14
- Second water-deficit stress treatment: 2017-08-15 through 2017-08-30

In [16]:
first_treatment_dates = pd.date_range(start='2017-08-01', end='2017-08-14')
second_treatment_dates = pd.date_range(start='2017-08-15', end='2017-08-30')

In [17]:
season_dates = weather_df_6.date.values

In [18]:
# first_treatment_dates

DatetimeIndex(['2017-08-01', '2017-08-02', '2017-08-03', '2017-08-04',
               '2017-08-05', '2017-08-06', '2017-08-07', '2017-08-08',
               '2017-08-09', '2017-08-10', '2017-08-11', '2017-08-12',
               '2017-08-13', '2017-08-14'],
              dtype='datetime64[ns]', freq='D')

In [19]:
# If a date falls within a treatment date range, it will have a value of True in the water deficit columns
# False values indicate regular irrigation on that date

first_treatment_col = []


for d in season_dates:
    
    if d in first_treatment_dates:
        
        first_treatment_col.append(True)
        
    else: 
        
        first_treatment_col.append(False)
        
print(len(first_treatment_col))

150


In [20]:
second_treatment_col = []

for d in season_dates:
    
    if d in second_treatment_dates:
        
        second_treatment_col.append(True)
        
    else:
        
        second_treatment_col.append(False)

In [21]:
weather_df_7 = weather_df_6.copy()

weather_df_7['first_water_deficit_treatment'] = first_treatment_col
weather_df_7['second_water_deficit_treatment'] = second_treatment_col

print(weather_df_7.shape)
weather_df_7.head(3)

(150, 33)


Unnamed: 0,year,day_of_year,station_number,air_temp_max,air_temp_min,air_temp_mean,rh_max,rh_min,rh_mean,vpd_mean,...,heat_units,eto_azmet,eto_p_m,vapor_pressure_mean,dewpoint_mean,date,gdd,cum_precip,first_water_deficit_treatment,second_water_deficit_treatment
109,2017,110,6,33.3,14.1,23.5,45.0,5.1,18.2,2.63,...,10.3,8.0,6.8,0.47,-3.7,2017-04-20,14.0,0.0,False,False
110,2017,111,6,34.4,11.1,24.0,46.5,5.5,17.2,2.82,...,9.4,8.5,7.4,0.43,-4.9,2017-04-21,26.0,0.0,False,False
111,2017,112,6,35.5,14.5,25.0,32.5,6.4,15.6,2.95,...,11.0,8.0,6.7,0.45,-4.2,2017-04-22,41.0,0.0,False,False


#### Write weather data to csv

In [None]:
timestamp = datetime.datetime.now().replace(microsecond=0).isoformat()
output_filename = f'data/processed/mac_season_4_daily_weather_{timestamp}.csv'.replace(':', '')

weather_df_7.to_csv(output_filename, index=False)