# Fetch weather data

Dark Sky provides a nice API with first 1000 calls for free, and then 0.0001 cent per request

In [1]:
# import ConfigImports Notebook to import and configure libs
%run ../ConfigImports.ipynb

#### Collect data from DarkSky API

In [2]:
def make_ts(year, month, day, hour, minute=0, second=0):
    return int(datetime(year, month, day, hour, minute, second).timestamp())

#### Define API parameters

In [3]:
BASE_URL = 'https://api.darksky.net/forecast'
API_KEY = DARK_SKY_API_KEY  # comes from the config file
lat = 51.802931199999996  # my house coordinates
long = -8.302591999999999
timestamp = make_ts(2020, 1, 4, 15, 10)
headers = {'Accept-Encoding': 'gzip'}

#### Define keys to pull from the API data

In [4]:

cur_keys = map(str.strip, """summary, precipIntensity, precipProbability, precipType, temperature, apparentTemperature, humidity,
windSpeed, windGust, windBearing, cloudCover, uvIndex, visibility""".split(','))
cur_keys = [k for k in cur_keys]
daily_keys = map(str.strip, """summary, sunriseTime, sunsetTime, temperatureHigh, temperatureLow""".split(','))
daily_keys = [k for k in daily_keys]

#### Define date range for data pull

Pull data hourly (always 30 minutes past an hour)

In [12]:
idx = pd.date_range(start='2019-12-27 00:30:00', end='2020-03-02 23:59:00', freq='1H')
df = pd.DataFrame({'dt': idx})

#### Pull data

Collect data for each time frame and store dataset as a csv file

In [13]:
def make_url(ts):
    return f'{BASE_URL}/{API_KEY}/{lat},{long},{ts}?exclude=hourly,flags,minutely&units=ca'

dark_sky = []
prev_cur_obj = {}
prev_daily_obj = {}

for index, d in tqdm(df.iterrows(), total=df.shape[0]):
    
    # extract date-time info from dt object
    args = (d.dt.year, d.dt.month, d.dt.day, d.dt.hour, d.dt.minute)
    ts = make_ts(*args)
    
    # make a call to Dark Sky API
    url = make_url(ts)
    sky_data = requests.get(url, headers=headers)
    sky_data = sky_data.json()
    currently = sky_data['currently']
    daily = sky_data['daily']
    
    # init object to save
    cur_obj = {'ts': ts, 'dt': d}
    
    # keep only selected data elements
    for k in cur_keys:
        if k in currently:
            cur_obj[f'cur__{k}'] = currently[k]
            prev_cur_obj[k] = currently[k]
        else:
            cur_obj[f'cur__{k}'] = prev_cur_obj[k]
        
    for k in daily_keys:
        if k in daily['data'][0]:
            cur_obj[f'daily__{k}'] = daily['data'][0][k]
            prev_daily_obj[k] = daily['data'][0][k]
        else:
            cur_obj[f'daily__{k}'] = prev_daily_obj[k]
    dark_sky.append(cur_obj)
    
dark_sky_df = pd.DataFrame(dark_sky)
dark_sky_df['dt'] = df['dt']
dark_sky_df.to_csv('../Datasets/dark_sky_data_2019-12-27_2020-03-02.csv', index=False)

100%|██████████| 1608/1608 [39:24<00:00,  1.47s/it] 
