In [80]:
#title           :fc_py3.ipynb
#description     :This downloads future 5-day weather forecast, updating per 3 hours.
#author          :Yuanyuan Zhao
#date            :20181003
#version         :3
#notes           :
#python_version  :3.6.4
#==============================================================================

from datetime import datetime, date, time, timedelta
import time as t1
import requests
import bs4
import json
import yaml
import csv
import numpy as np
from sklearn.externals import joblib

In [81]:
# scrapes weather forecast data from openweathermap.org
def spider(path):
    
    global fcResult 
    fcResult = []
    locs = []
#     reads in locations
    with open(path, "r") as csvf:
        dataReader = csv.reader(csvf)
        for line in dataReader:
            locs.append(line)
            
    for loc in locs:
        url = "http://api.openweathermap.org/data/2.5/forecast?lat=%s&lon=%s&APPID=ea4985020f724407dea8833c9dfee64c"%(loc[0], loc[1])
        response = requests.get(url)
        json_data = yaml.load(json.dumps(response.json()))
#         stores data in the following format: 
#         [dt, temp, pressure, humidity, temp_min, temp_max, wind_speed, clouds, weather_id, weather, description]
        for x in json_data['list']:
            weather_data = ((item["id"], item["main"], item["description"]) for item in x["weather"])
            fcResult.append([datetime.utcfromtimestamp(int(x["dt"])).strftime('%Y-%m-%d %H:%M:%S'),
                        x["main"]["temp"],
                        x["main"]["pressure"],
                        x["main"]["humidity"],
                        x["main"]["temp_min"],
                        x["main"]["temp_max"],
                        x["wind"]["speed"],
                        x["clouds"]["all"],
                        list(weather_data)
                        ])
    return fcResult

In [82]:
#processing of forecast weather data

In [83]:
#create dictionary to map weather description information to input feature
idf = open("idList.csv",'r')
ids = {}
skip = True
key = 0
for line in idf:
    if skip:
        skip = False
        continue
    id = int(line.split(',')[0])
    ids[id] = key
    key += 1

In [84]:
#format of input weather data:
#[dt, temp, pressure, humidity, temp_min, temp_max, wind_speed, clouds, weather_id, weather, description]
#format of processed weather data:
#[temp, temp_diff, pressure, humidity, wind_speed, clouds, bag of weather]
def process(weather):
    processing = []#store temporary processed weather data 
    processed = []#store processed weather data
    time = []#store corresponding time of forecast weather data
    
    for i in range(40):#number of weather data for one location each call
        time.append(weather[i][0])
    
    for i in range(len(weather)):
        row = [float(weather[i][1]), float(weather[i][5])-float(weather[i][4]), float(weather[i][2]),
               float(weather[i][3]),float(weather[i][6]),float(weather[i][7])]#first six features
        bow = np.zeros(len(ids))#bag of weather feature
        weatherdes = weather[i][8]
        for j in range(len(weatherdes)):
            wid = weatherdes[j][0]
            bow[ids[wid]] = 1
        processing.append(np.hstack([row, bow]))
        
    wnum = 40#number of weather data for one location each call
    locnum = 10#number of location for each path
    processing = np.array(processing)
    for i in range(wnum):
        weather1 = []#store the first six features of 10 locations
        weather2 = []#store bag of weather feature of 10 locations
        for j in range(locnum):
            weather1.append(processing[i+40*j, :6])
            weather2.append(processing[i+40*j, 6:])
        processed.append(np.hstack([np.hstack(weather1), np.sum(weather2, axis=0)]))
    return time, processed

In [85]:
#load pre-trained model
max_clf = joblib.load("max_clf.pkl")#model predicting max drop rate
mean_clf = joblib.load("mean_clf.pkl")# model predicting mean drop rate

In [None]:
while(True):
#     forecast weather data for each path
    fc_AUR02_CAR01 = spider('AUR02-CAR01.csv')
    fc_CAR01_AUR02 = spider('CAR01-AUR02.csv')
    fc_CHI01_AUR02 = spider('CHI01-AUR02.csv')
    fc_AUR02_CHI01 = spider('AUR02-CHI01.csv')
    fc_FRA01_SLO02 = spider('FRA01-SLO02.csv')
    fc_SLO02_FRA01 = spider('SLO02-FRA01.csv')
    fc_SEC10_TOR01 = spider('SEC10-TOR01.csv')
    
#     process weather data
    time, input_AURCAR = process(fc_AUR02_CAR01)
    input_CARAUR = process(fc_CAR01_AUR02)[1]
    input_CHIAUR = process(fc_CHI01_AUR02)[1]
    input_AURCHI = process(fc_AUR02_CHI01)[1]
    input_FRASLO = process(fc_FRA01_SLO02)[1]
    input_SLOFRA = process(fc_SLO02_FRA01)[1]
    input_SECTOR = process(fc_SEC10_TOR01)[1]
    
#     predict max and mean drop rate
    maxpred_AURCAR = max_clf.predict(input_AURCAR)
    maxpred_CARAUR = max_clf.predict(input_CARAUR)
    maxpred_CHIAUR = max_clf.predict(input_CHIAUR)
    maxpred_AURCHI = max_clf.predict(input_AURCHI)
    maxpred_FRASLO = max_clf.predict(input_FRASLO)
    maxpred_SLOFRA = max_clf.predict(input_SLOFRA)
    maxpred_SECTOR = max_clf.predict(input_SECTOR)
    
    meanpred_AURCAR = mean_clf.predict(input_AURCAR)
    meanpred_CARAUR = mean_clf.predict(input_CARAUR)
    meanpred_CHIAUR = mean_clf.predict(input_CHIAUR)
    meanpred_AURCHI = mean_clf.predict(input_AURCHI)
    meanpred_FRASLO = mean_clf.predict(input_FRASLO)
    meanpred_SLOFRA = mean_clf.predict(input_SLOFRA)
    meanpred_SECTOR = mean_clf.predict(input_SECTOR)
    
#     example of predicted label
    print(time)#corresponding time of predicted drop rate
    print(len(maxpred_AURCAR), len(meanpred_AURCAR))#there are 40 predictions for each path
    print(maxpred_AURCAR)#predicted max drop rate for AUR02_CAR01
#     you can use predicted results stored above to do visualization
    
    
#     update every 3 hour
    t1.sleep(60*60*3)

['2018-11-26 00:00:00', '2018-11-26 03:00:00', '2018-11-26 06:00:00', '2018-11-26 09:00:00', '2018-11-26 12:00:00', '2018-11-26 15:00:00', '2018-11-26 18:00:00', '2018-11-26 21:00:00', '2018-11-27 00:00:00', '2018-11-27 03:00:00', '2018-11-27 06:00:00', '2018-11-27 09:00:00', '2018-11-27 12:00:00', '2018-11-27 15:00:00', '2018-11-27 18:00:00', '2018-11-27 21:00:00', '2018-11-28 00:00:00', '2018-11-28 03:00:00', '2018-11-28 06:00:00', '2018-11-28 09:00:00', '2018-11-28 12:00:00', '2018-11-28 15:00:00', '2018-11-28 18:00:00', '2018-11-28 21:00:00', '2018-11-29 00:00:00', '2018-11-29 03:00:00', '2018-11-29 06:00:00', '2018-11-29 09:00:00', '2018-11-29 12:00:00', '2018-11-29 15:00:00', '2018-11-29 18:00:00', '2018-11-29 21:00:00', '2018-11-30 00:00:00', '2018-11-30 03:00:00', '2018-11-30 06:00:00', '2018-11-30 09:00:00', '2018-11-30 12:00:00', '2018-11-30 15:00:00', '2018-11-30 18:00:00', '2018-11-30 21:00:00']
40 40
[0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 