In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
from io import StringIO
import sys
import time
import csv
import logging
import os
from shapely import wkb, wkt
from shapely.geometry import Point
import traceback
from pywtk.site_lookup import get_3tiersites_from_wkt
from tqdm import tqdm

In [7]:
nearest_sites = pd.read_csv("closest_sites.csv")
nearest_sites.head()

Unnamed: 0,county,lon,lat,site_id,lon2,lat2,distance
0,MARION,-97.102771,38.359647,39413,-97.235809,38.270863,15.254648
1,MCPHERSON,-97.647489,38.395812,39309,-97.682434,38.245983,16.955811
2,ANDERSON,-95.292046,38.215114,39330,-95.423431,38.258228,12.450148
3,COFFEY,-95.729137,38.23645,39743,-95.587433,38.369919,19.33874
4,DOUGLAS,-95.290946,38.896415,42009,-95.251892,38.830338,8.097266


In [65]:
# Functions for downloading data
years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014] # available years
attributes = ['windspeed_10m', 'windspeed_100m', 'temperature_10m', 'temperature_100m', 'winddirection_100m']

def point_download(point, columns, years):
    url = 'https://developer.nrel.gov/api/wind-toolkit/v2/wind/wtk-download.csv?'
    df = 'tmp'
    for yr in range(0,len(years)):
        time.sleep(2)
        year = years[yr]
        params = { 'api_key' : '037P8C9W1gMsO4cNFDFvUYvTDnIhMlA3NSVhXdCC', 'wkt' : point,
                   'names' : [year], 'email' : 'julians3.1415@gmail.com'}
        response = requests.get(url, params)
        #print(response.content)
        raw_data = StringIO(response.text)
        df_yr = pd.read_csv(raw_data, sep = ",", header=1)#[columns]
        if type(df)==str:
            df = df_yr
        else:
            df = pd.concat([df, df_yr], axis=0)
    return df

# function to name csv
def name_csv(state, county):
    return f'{state}_{county}.csv'


In [12]:
lat_test, lon_test = nearest_sites['lat'][0], nearest_sites['lon'][0]
p = Point(lat_test, lon_test)

In [25]:
points = [Point(lon, lat) for lon,lat in zip(list(nearest_sites['lon2']), list(nearest_sites['lat2']))]
nearest_sites['Point'] = points
nearest_sites.head()

Unnamed: 0,county,lon,lat,site_id,lon2,lat2,distance,Point
0,MARION,-97.102771,38.359647,39413,-97.235809,38.270863,15.254648,POINT (-97.235809 38.270863)
1,MCPHERSON,-97.647489,38.395812,39309,-97.682434,38.245983,16.955811,POINT (-97.682434 38.245983)
2,ANDERSON,-95.292046,38.215114,39330,-95.423431,38.258228,12.450148,POINT (-95.42343100000001 38.258228)
3,COFFEY,-95.729137,38.23645,39743,-95.587433,38.369919,19.33874,POINT (-95.58743299999999 38.369919)
4,DOUGLAS,-95.290946,38.896415,42009,-95.251892,38.830338,8.097266,POINT (-95.251892 38.830338)


In [50]:
df_marion = point_download(nearest_sites['Point'][0], attributes, years)
df_marion.to_csv('kansas_marion_5.csv')

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [42]:
df_marion.to_csv('kansas_marion.csv')

In [35]:
2*1500/60

50.0

In [44]:
nearest_sites

Unnamed: 0,county,lon,lat,site_id,lon2,lat2,distance,Point
0,MARION,-97.102771,38.359647,39413,-97.235809,38.270863,15.254648,POINT (-97.235809 38.270863)
1,MCPHERSON,-97.647489,38.395812,39309,-97.682434,38.245983,16.955811,POINT (-97.682434 38.245983)
2,ANDERSON,-95.292046,38.215114,39330,-95.423431,38.258228,12.450148,POINT (-95.42343100000001 38.258228)
3,COFFEY,-95.729137,38.23645,39743,-95.587433,38.369919,19.33874,POINT (-95.58743299999999 38.369919)
4,DOUGLAS,-95.290946,38.896415,42009,-95.251892,38.830338,8.097266,POINT (-95.251892 38.830338)
5,LYON,-96.161589,38.455498,39503,-96.341431,38.296246,23.676994,POINT (-96.341431 38.296246)
6,MIAMI,-94.832963,38.566772,40363,-94.594086,38.508976,21.772187,POINT (-94.594086 38.508976)
7,MORRIS,-96.651448,38.688204,41361,-96.746796,38.719349,8.979433,POINT (-96.746796 38.719349)
8,OSAGE,-95.707596,38.648409,41484,-95.561279,38.73962,16.269865,POINT (-95.561279 38.73962)
9,SHAWNEE,-95.755664,39.041805,42885,-95.797455,39.036404,3.663087,POINT (-95.797455 39.036404)


In [66]:
for i in range(1,21):
    county = nearest_sites['county'][i]
    point = nearest_sites['Point'][i]
    df = point_download(point, attributes, years)
    df.to_csv('../../direcho_data/kansas_small/kansas_{}_60.csv'.format(str(county).lower()))
    

In [61]:
df_test = df_marion.head()
df_test.to_csv("../../direcho_data/kansas_small/.csv")

## Big Run -- Second API

In [5]:
wkt_locations = pd.read_csv("US_wind_locations_3.csv")
corn = pd.read_csv("~/Documents/Schoolwork/Junior Fall/EPS 168/project/data/data_yield/Maize_1999_2019_NASS.csv")
corn.rename(columns={'State ANSI': 'STATEFP', 'County ANSI':'COUNTYFP'}, inplace=True)

In [8]:
corn_clean = corn.dropna(subset=['STATEFP','COUNTYFP']).copy()
corn_clean['ST_CT'] = [(str(elt[0])+"_"+str(int(elt[1]))) for elt in zip(np.array(corn_clean['STATEFP'].values), np.array(corn_clean['COUNTYFP'].values)) if np.isnan(elt[1]) == False] 
wkt_locations['ST_CT'] = [(str(elt[0])+"_"+str(elt[1])) for elt in zip(np.array(wkt_locations['STATEFP'].values), np.array(wkt_locations['COUNTYFP'].values))]

In [11]:
# get unique sites to download from and index to kansas sites
to_download = wkt_locations.loc[wkt_locations['ST_CT'].isin(set(corn_clean['ST_CT'].values))]
kansas_download = to_download.loc[to_download['STATE']=="Kansas"]
kansas_download.head()

Unnamed: 0.1,Unnamed: 0,NAME,STATE,STATEFP,COUNTYFP,NAMELSAD,POINT,SITE,ST_CT
121,121,Greenwood,Kansas,20,73,Greenwood County,POINT (-96.524567 37.667553),35665,20_73
122,122,Greenwood,Kansas,20,73,Greenwood County,POINT (-96.50140399999999 37.686131),35828,20_73
123,123,Greenwood,Kansas,20,73,Greenwood County,POINT (-96.431702 37.723366),36122,20_73
135,135,Doniphan,Kansas,20,43,Doniphan County,POINT (-95.33523599999999 39.923622),47564,20_43
136,136,Doniphan,Kansas,20,43,Doniphan County,POINT (-95.334656 39.979237),47994,20_43


In [13]:
# Functions for downloading data
def point_download(point, interval, years):
    url = 'https://developer.nrel.gov/api/wind-toolkit/v2/wind/wtk-download.csv?'
    df = 'tmp'
    for yr in range(0,len(years)):
        time.sleep(2)
        year = years[yr]
        params = { 'api_key' : 'yMDVMKtCa4FGh2vmvstXY8ZDZbWKqBl2vddmnKZg', 'wkt' : point,
                   'names' : [year], 'email' : 'davidwma@yahoo.com', 'interval': interval}
        response = requests.get(url, params)
        #print(response.content)
        raw_data = StringIO(response.text)
        df_yr = pd.read_csv(raw_data, sep = ",", header=1)#[columns]
        if type(df)==str:
            df = df_yr
        else:
            df = pd.concat([df, df_yr], axis=0)
    return df
# static
years = [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014] # available years
attributes = ['windspeed_10m', 'windspeed_100m', 'temperature_10m', 'temperature_100m', 'winddirection_100m']

In [14]:
%%time
interval = 5 # set wind resolution to 5 min (default is sample every 60 mins)
# Iterate through dataframe for download (ok to do so because time is within loop)
for index in tqdm(range(140, len(kansas_download))):
    row = kansas_download.iloc[index]
    state, county, site = row['STATE'], row['NAME'], row['SITE'] # parameters for naming
    point = row['POINT'] # extract point for download
    df = point_download(point, interval, years)
    df.to_csv(f'../../direcho_data/{site}_{state}_{county}_{interval}.csv')

  0%|          | 0/128 [03:03<?, ?it/s]


ParserError: Passed header=1 but only 1 lines in file