# Codes for running large scale Wind Data Downloads

The script uses shapely points querried from pywkt site lookup written to csv in spatial intersect. 

In [37]:
# Select state, year, and attributes for which to download data
state = "Indiana"
interval = 5 # set wind resolution to 5 min (default is sample every 60 mins)
years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014] # available years
attributes = ['windspeed_10m', 'windspeed_100m', 'temperature_10m', 'temperature_100m', 'winddirection_100m']

In [3]:
# Package Imports
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
from io import StringIO
import sys
import time
import csv
import logging
import os
from os import path
from shapely import wkb, wkt
from shapely.geometry import Point, shape, Polygon
import traceback
import pywtk
from pywtk.site_lookup import get_3tiersites_from_wkt
import fiona

# Imports for timing download
from tqdm import tqdm
from tqdm.notebook import trange

In [4]:
# read csvs
wkt_locations = pd.read_csv("US_wind_locations_3.csv")
corn = pd.read_csv("~/Documents/Schoolwork/Junior Fall/EPS 168/project/data/data_yield/Maize_1999_2019_NASS.csv")

In [32]:
# Clean Data
corn.rename(columns={'State ANSI': 'STATEFP', 'County ANSI':'COUNTYFP'}, inplace=True) # match columns
corn_clean = corn.dropna(subset=['STATEFP','COUNTYFP']).copy()

# Add modified FIPS column to yield and wkt data 
corn_clean['ST_CT'] = [(str(elt[0])+"_"+str(int(elt[1]))) for elt in \
                       zip(np.array(corn_clean['STATEFP'].values), np.array(corn_clean['COUNTYFP'].values))]
wkt_locations['ST_CT'] = [(str(elt[0])+"_"+str(elt[1])) for elt in \
                       zip(np.array(wkt_locations['STATEFP'].values), np.array(wkt_locations['COUNTYFP'].values))]

# extract wkt locations which contain a FIPS matching one in the corn dataset
to_download = wkt_locations.loc[wkt_locations['ST_CT'].isin(set(corn_clean['ST_CT'].values))]
to_download.head()

Unnamed: 0.1,Unnamed: 0,NAME,STATE,STATEFP,COUNTYFP,NAMELSAD,POINT,SITE,ST_CT
0,0,Cuming,Nebraska,31,39,Cuming County,POINT (-96.985748 41.851967),66913,31_39
5,5,Lancaster,Nebraska,31,109,Lancaster County,POINT (-96.723145 40.7766),54135,31_109
6,6,Lancaster,Nebraska,31,109,Lancaster County,POINT (-96.575378 40.703255),53444,31_109
7,7,Lancaster,Nebraska,31,109,Lancaster County,POINT (-96.7229 40.75803),53954,31_109
8,8,Nuckolls,Nebraska,31,129,Nuckolls County,POINT (-98.216003 40.032036),48472,31_129


In [38]:
# Select sites from dataframe which match state for download
selected_download = to_download.loc[to_download['STATE']== state]
print("The following will implement the download of {} wind data sites.".format(len(selected_download)))
selected_download.head()

The following will implement the download of 210 wind data sites.


Unnamed: 0.1,Unnamed: 0,NAME,STATE,STATEFP,COUNTYFP,NAMELSAD,POINT,SITE,ST_CT
104,104,Jasper,Indiana,18,73,Jasper County,POINT (-87.236694 40.824963),58829,18_73
105,105,Jasper,Indiana,18,73,Jasper County,POINT (-87.13104199999999 41.040287),61490,18_73
106,106,Jasper,Indiana,18,73,Jasper County,POINT (-87.209656 40.841377),59065,18_73
109,109,Steuben,Indiana,18,151,Steuben County,POINT (-85.150482 41.567474),70745,18_151
110,110,Steuben,Indiana,18,151,Steuben County,POINT (-85.153839 41.549061),70473,18_151


In [7]:
# Functions for downloading data
def point_download(point, interval, years):
    url = 'https://developer.nrel.gov/api/wind-toolkit/v2/wind/wtk-download.csv?'
    df = 'tmp'
    for yr in range(0,len(years)):
        time.sleep(2)
        year = years[yr]
        params = { 'api_key' : '037P8C9W1gMsO4cNFDFvUYvTDnIhMlA3NSVhXdCC', 'wkt' : point,
                   'names' : [year], 'email' : 'julians3.1415@gmail.com', 'interval': interval}
        response = requests.get(url, params)
        raw_data = StringIO(response.text)
        df_yr = pd.read_csv(raw_data, sep = ",", header=1)
        if type(df)==str:
            df = df_yr
        else:
            df = pd.concat([df, df_yr], axis=0)
    return df

In [40]:
# check for directory 
if not os.path.isdir("/Users/julianschmitt/Downloads/Direcho/{}/".format(state)):
    os.mkdir("/Users/julianschmitt/Downloads/Direcho/{}/".format(state))

In [41]:
%%time
# Iterate through dataframe for download (ok to do so because time is within loop)
for index in tqdm(range(3+184,len(selected_download))):
    row = selected_download.iloc[index]
    state, county, site = row['STATE'], row['NAME'], row['SITE'] # parameters for naming
    point = row['POINT'] # extract point for download
    df = point_download(point, interval, years)
    df.to_csv("~/Downloads/Direcho/{}/{}_{}_{}_{}.csv".format(state, site, state, county, interval)) # save to csv

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  app.launch_new_instance()
 89%|████████▉ | 184/207 [10:00:34<1:15:04, 195.84s/it]


KeyboardInterrupt: 

In [30]:
# Verify by ID number that all were downloaded
indir = set(os.listdir("/Users/julianschmitt/Downloads/Direcho/{}".format(state)))
wisco = [int(elt.split("_")[0]) for elt in indir]
def Diff(li1, li2):
    return (list(list(set(li1)-set(li2)) + list(set(li2)-set(li1))))
print(Diff(selected_download.SITE.values, wisco))


[]


## Meeting 11/6
How do we optimiize a damage function so that we get the yield outcome as close as possible. MLR - convert every windspeed into a unit of damage. Look at log yield (fraction of yield). Different yields in different places - accounts for this. As windspeed decreases above 15m a second, take the excess and multiply by $\beta$ coefficient. Increase 1%/(m/s) per event. Transform windspeed into damages and then sum damages. This is now an accumulated damage. One outcome and one yield. Adjust the rate at which the damage is occuring. 