# Codes for running large scale Wind Data Downloads

The script uses shapely points querried from pywkt site lookup written to csv in spatial intersect. 

In [None]:
# Select state, year, and attributes for which to download data
state = "Iowa"
interval = 5 # set wind resolution to 5 min (default is sample every 60 mins)
years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014] # available years
attributes = ['windspeed_10m', 'windspeed_100m', 'temperature_10m', 'temperature_100m', 'winddirection_100m']

In [20]:
# Package Imports
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
from io import StringIO
import sys
import time
import csv
import logging
import os
from os import path
from shapely import wkb, wkt
from shapely.geometry import Point, shape, Polygon
import traceback
import pywtk
from pywtk.site_lookup import get_3tiersites_from_wkt
import fiona

# Imports for timing download
from tqdm import tqdm
from tqdm.notebook import trange

In [3]:
# read csvs
wkt_locations = pd.read_csv("US_wind_locations_3.csv")
corn = pd.read_csv("~/Documents/Schoolwork/Junior Fall/EPS 168/project/data/data_yield/Maize_1999_2019_NASS.csv")

In [9]:
# Clean Data
corn.rename(columns={'State ANSI': 'STATEFP', 'County ANSI':'COUNTYFP'}, inplace=True) # match columns
corn_clean = corn.dropna(subset=['STATEFP','COUNTYFP']).copy()

# Add modified FIPS column to yield and wkt data 
corn_clean['ST_CT'] = [(str(elt[0])+"_"+str(int(elt[1]))) for elt in \
                       zip(np.array(corn_clean['STATEFP'].values), np.array(corn_clean['COUNTYFP'].values))]
wkt_locations['ST_CT'] = [(str(elt[0])+"_"+str(elt[1])) for elt in \
                       zip(np.array(wkt_locations['STATEFP'].values), np.array(wkt_locations['COUNTYFP'].values))]

# extract wkt locations which contain a FIPS matching one in the corn dataset
to_download = wkt_locations.loc[wkt_locations['ST_CT'].isin(set(corn_clean['ST_CT'].values))]
to_download.head()

Unnamed: 0.1,Unnamed: 0,NAME,STATE,STATEFP,COUNTYFP,NAMELSAD,POINT,SITE,ST_CT
0,0,Cuming,Nebraska,31,39,Cuming County,POINT (-96.985748 41.851967),66913,31_39
5,5,Lancaster,Nebraska,31,109,Lancaster County,POINT (-96.723145 40.7766),54135,31_109
6,6,Lancaster,Nebraska,31,109,Lancaster County,POINT (-96.575378 40.703255),53444,31_109
7,7,Lancaster,Nebraska,31,109,Lancaster County,POINT (-96.7229 40.75803),53954,31_109
8,8,Nuckolls,Nebraska,31,129,Nuckolls County,POINT (-98.216003 40.032036),48472,31_129


In [17]:
# Select sites from dataframe which match state for download
selected_download = to_download.loc[to_download['STATE']== state]
print("The following will implement the download of {} wind data sites.".format(len(selected_download)))
selected_download.head()

The following will implement the download of 267 wind data sites.


Unnamed: 0.1,Unnamed: 0,NAME,STATE,STATEFP,COUNTYFP,NAMELSAD,POINT,SITE,ST_CT
266,266,Decatur,Iowa,19,53,Decatur County,POINT (-93.77731300000001 40.8857),55562,19_53
267,267,Decatur,Iowa,19,53,Decatur County,POINT (-93.557129 40.862282),55352,19_53
368,368,Cedar,Iowa,19,31,Cedar County,POINT (-90.993286 41.851456),68641,19_31
369,369,Cedar,Iowa,19,31,Cedar County,POINT (-91.016632 41.87117),68898,19_31
370,370,Cedar,Iowa,19,31,Cedar County,POINT (-90.96993999999999 41.831738),68365,19_31


In [21]:
# Functions for downloading data
def point_download(point, interval, years):
    url = 'https://developer.nrel.gov/api/wind-toolkit/v2/wind/wtk-download.csv?'
    df = 'tmp'
    for yr in range(0,len(years)):
        time.sleep(2)
        year = years[yr]
        params = { 'api_key' : '037P8C9W1gMsO4cNFDFvUYvTDnIhMlA3NSVhXdCC', 'wkt' : point,
                   'names' : [year], 'email' : 'julians3.1415@gmail.com', 'interval': interval}
        response = requests.get(url, params)
        raw_data = StringIO(response.text)
        df_yr = pd.read_csv(raw_data, sep = ",", header=1)
        if type(df)==str:
            df = df_yr
        else:
            df = pd.concat([df, df_yr], axis=0)
    return df

In [None]:
%%time
# Iterate through dataframe for download (ok to do so because time is within loop)
for index in tqdm(range(73:len(selected_download))):
    row = selected_download.iloc[index]
    state, county, site = row['STATE'], row['NAME'], row['SITE'] # parameters for naming
    point = row['POINT'] # extract point for download
    df = point_download(point, interval, years)
    df.to_csv("~/Downloads/Direcho/{}/{}_{}_{}_{}.csv".format(state, site, state, county, interval)) # save to csv

In [4]:
# rename corn data columns to be able to merge with site data. 

# merge dataframes to get rows containing crop and wind station data
# merged_df = wkt_locations.merge(corn, how = 'inner', on = ['STATEFP', 'COUNTYFP'])
# corn.columns

Index([u'Program', u'Year', u'Period', u'Week Ending', u'Geo Level', u'State',
       u'STATEFP', u'Ag District', u'Ag District Code', u'County', u'COUNTYFP',
       u'Zip Code', u'Region', u'watershed_code', u'Watershed', u'Commodity',
       u'Data Item', u'Domain', u'Domain Category', u'Value', u'CV (%)'],
      dtype='object')

In [5]:
#corn_unique = corn[['STATEFP','COUNTYFP']].drop_duplicates(subset=['STATEFP','COUNTYFP']).dropna().astype(int)

In [6]:
# ar = pd.DataFrame(np.array([1,1]))
# ar.isin(corn_unique)
# print(ar)


   0
0  1
1  1


In [7]:
# wkt_test = wkt_locations[["STATEFP","COUNTYFP"]]
# count = 0
# for i in range(len(wkt_test)):
#     if (wkt_test.iloc[i,:].isin(corn_unique).all().any()):
#     #if wkt_test.iloc[i,:].isin(corn_unique[['STATEFP','COUNTYFP']]).all():
#         count += 1
# print(count)

0


In [8]:
# test = np.vstack((np.array(corn['STATEFP'].values), np.array(corn['COUNTYFP'].values)))
# strings = [f'{int(elt[])}']

SyntaxError: invalid syntax (<fstring>, line 1)

In [5]:
corn_clean = corn.dropna(subset=['STATEFP','COUNTYFP']).copy()
corn_clean['ST_CT'] = [(str(elt[0])+"_"+str(int(elt[1]))) for elt in zip(np.array(corn_clean['STATEFP'].values), np.array(corn_clean['COUNTYFP'].values))]# if np.isnan(elt[1]) == False] 
wkt_locations['ST_CT'] = [(str(elt[0])+"_"+str(elt[1])) for elt in zip(np.array(wkt_locations['STATEFP'].values), np.array(wkt_locations['COUNTYFP'].values))]
corn_clean.head()

Unnamed: 0,Program,Year,Period,Week Ending,Geo Level,State,STATEFP,Ag District,Ag District Code,County,...,Region,watershed_code,Watershed,Commodity,Data Item,Domain,Domain Category,Value,CV (%),ST_CT
0,SURVEY,2019,YEAR,,COUNTY,ALABAMA,1,BLACK BELT,40,AUTAUGA,...,,0,,CORN,"CORN, GRAIN - YIELD, MEASURED IN BU / ACRE",TOTAL,NOT SPECIFIED,165.6,,1_1
1,SURVEY,2019,YEAR,,COUNTY,ALABAMA,1,BLACK BELT,40,DALLAS,...,,0,,CORN,"CORN, GRAIN - YIELD, MEASURED IN BU / ACRE",TOTAL,NOT SPECIFIED,133.3,,1_47
2,SURVEY,2019,YEAR,,COUNTY,ALABAMA,1,BLACK BELT,40,ELMORE,...,,0,,CORN,"CORN, GRAIN - YIELD, MEASURED IN BU / ACRE",TOTAL,NOT SPECIFIED,104.3,,1_51
4,SURVEY,2019,YEAR,,COUNTY,ALABAMA,1,BLACK BELT,40,PERRY,...,,0,,CORN,"CORN, GRAIN - YIELD, MEASURED IN BU / ACRE",TOTAL,NOT SPECIFIED,110.2,,1_105
5,SURVEY,2019,YEAR,,COUNTY,ALABAMA,1,COASTAL PLAINS & GULF COAST,50,BALDWIN,...,,0,,CORN,"CORN, GRAIN - YIELD, MEASURED IN BU / ACRE",TOTAL,NOT SPECIFIED,150.9,,1_3


In [6]:
to_download = wkt_locations.loc[wkt_locations['ST_CT'].isin(set(corn_clean['ST_CT'].values))]

In [7]:
to_download

Unnamed: 0.1,Unnamed: 0,NAME,STATE,STATEFP,COUNTYFP,NAMELSAD,POINT,SITE,ST_CT
0,0,Cuming,Nebraska,31,39,Cuming County,POINT (-96.985748 41.851967),66913,31_39
5,5,Lancaster,Nebraska,31,109,Lancaster County,POINT (-96.723145 40.7766),54135,31_109
6,6,Lancaster,Nebraska,31,109,Lancaster County,POINT (-96.575378 40.703255),53444,31_109
7,7,Lancaster,Nebraska,31,109,Lancaster County,POINT (-96.7229 40.75803),53954,31_109
8,8,Nuckolls,Nebraska,31,129,Nuckolls County,POINT (-98.216003 40.032036),48472,31_129
9,9,Nuckolls,Nebraska,31,129,Nuckolls County,POINT (-98.26370199999999 40.012466),48360,31_129
10,10,Nuckolls,Nebraska,31,129,Nuckolls County,POINT (-98.215332 40.013504),48362,31_129
11,11,Minnehaha,South Dakota,46,99,Minnehaha County,POINT (-96.475342 43.567009),87915,46_99
12,12,Minnehaha,South Dakota,46,99,Minnehaha County,POINT (-97.096497 43.804649),91048,46_99
13,13,Minnehaha,South Dakota,46,99,Minnehaha County,POINT (-97.121582 43.767155),90675,46_99


## Meeting 11/6
How do we optimiize a damage function so that we get the yield outcome as close as possible. MLR - convert every windspeed into a unit of damage. Look at log yield (fraction of yield). Different yields in different places - accounts for this. As windspeed decreases above 15m a second, take the excess and multiply by $\beta$ coefficient. Increase 1%/(m/s) per event. Transform windspeed into damages and then sum damages. This is now an accumulated damage. One outcome and one yield. Adjust the rate at which the damage is occuring. 

In [12]:
# Select data for Kansas Download
kansas_download = to_download.loc[to_download['STATE']=="Kansas"]
print(f"The following will implement the download of {len(kansas_download)} wind data sites")
kansas_download.head()

The following will implement the download of 268 wind data sites


Unnamed: 0.1,Unnamed: 0,NAME,STATE,STATEFP,COUNTYFP,NAMELSAD,POINT,SITE,ST_CT
121,121,Greenwood,Kansas,20,73,Greenwood County,POINT (-96.524567 37.667553),35665,20_73
122,122,Greenwood,Kansas,20,73,Greenwood County,POINT (-96.50140399999999 37.686131),35828,20_73
123,123,Greenwood,Kansas,20,73,Greenwood County,POINT (-96.431702 37.723366),36122,20_73
135,135,Doniphan,Kansas,20,43,Doniphan County,POINT (-95.33523599999999 39.923622),47564,20_43
136,136,Doniphan,Kansas,20,43,Doniphan County,POINT (-95.334656 39.979237),47994,20_43


In [13]:
# Functions for downloading data
def point_download(point, interval, years):
    url = 'https://developer.nrel.gov/api/wind-toolkit/v2/wind/wtk-download.csv?'
    df = 'tmp'
    for yr in range(0,len(years)):
        time.sleep(2)
        year = years[yr]
        params = { 'api_key' : '037P8C9W1gMsO4cNFDFvUYvTDnIhMlA3NSVhXdCC', 'wkt' : point,
                   'names' : [year], 'email' : 'julians3.1415@gmail.com', 'interval': interval}
        response = requests.get(url, params)
        #print(response.content)
        raw_data = StringIO(response.text)
        df_yr = pd.read_csv(raw_data, sep = ",", header=1)#[columns]
        if type(df)==str:
            df = df_yr
        else:
            df = pd.concat([df, df_yr], axis=0)
    return df
# static
years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014] # available years
attributes = ['windspeed_10m', 'windspeed_100m', 'temperature_10m', 'temperature_100m', 'winddirection_100m']

In [14]:
%%time
interval = 5 # set wind resolution to 5 min (default is sample every 60 mins)
from tqdm import tqdm
from tqdm.notebook import trange # Get timing 
# Iterate through dataframe for download (ok to do so because time is within loop)
for index in tqdm(range(7+102,len(kansas_download))):
    row = kansas_download.iloc[index]
    state, county, site = row['STATE'], row['NAME'], row['SITE'] # parameters for naming
    point = row['POINT'] # extract point for download
    df = point_download(point, interval, years)
    df.to_csv(f'../../direcho_data/{site}_{state}_{county}_{interval}.csv')

100%|██████████| 159/159 [9:43:04<00:00, 220.03s/it]  

CPU times: user 1h 6min 46s, sys: 5min 17s, total: 1h 12min 4s
Wall time: 9h 43min 4s





In [None]:
%%time
for index in tqdm(range(6, len(kansas_download))):
    print(kansas_download.iloc[index])

In [None]:
kansas_d