## DarkSky API pull for fire post clustered data
### Main columns needed are latitude, longitude, datetime

In [1]:
import pandas as pd
import datetime
import numpy as np

import config

from darksky import forecast
from datetime import datetime as dt
from datetime import timedelta
from datetime import date
from sqlalchemy import create_engine

### Use SQLalchemy to create a connection engine and connect to our MySQL database

In [2]:
# create sqlalchemy engine
engine = create_engine("mysql+mysqlconnector://{user}:{password}@{host}/{dbname}"
                       .format(user=config.db_user,
                               password=config.db_pass,
                               dbname=config.db_name,
                               host=config.db_host))

### Setting DarkSky API Keys & Creating Weather Lookup Function

In [3]:
RAPIDAPI_KEY  = config.darksky_api1
RAPIDAPI_KEY2  = config.darksky_api2

In [4]:
def weather_lookup(df, key=RAPIDAPI_KEY, days_before=0, days_after=0):
    data = []
    for index, row in df.iterrows():
        ts = row['datetime'].isoformat() 
        lat = row['latitude']
        lon = row['longitude']
        weather = forecast(key, lat, lon, time=ts)
        w_dict = weather['currently']
        w_dict['timestamp'] = ts
        w_dict['latitude'] = lat
        w_dict['longitude'] = lon
        data.append(w_dict)
    return data


## Load the Emission Clusters to pull weather from

In [5]:
filename = "fire_lat_lon_data/Emissions2014_DBScan_Clusters.csv"

In [6]:
firedata = pd.read_csv(filename)

In [7]:
firedata['datetime'] = (pd.to_datetime(firedata.year, format='%Y') + 
                       firedata['doy'].apply(np.ceil).apply(
                           lambda x: pd.Timedelta(x, unit='D')) - timedelta(days=1) + timedelta(hours=12))

In [8]:
firedata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8059 entries, 0 to 8058
Data columns (total 24 columns):
Unnamed: 0             8059 non-null int64
id                     8059 non-null float64
year                   8059 non-null float64
doy                    8059 non-null float64
longitude              8059 non-null float64
latitude               8059 non-null float64
grid10k                8059 non-null float64
covertype              8059 non-null float64
fuelcode               8059 non-null float64
area_burned            8059 non-null float64
prefire_fuel           8059 non-null float64
consumed_fuel          8059 non-null float64
ECO2                   8059 non-null float64
ECO                    8059 non-null float64
ECH4                   8059 non-null float64
EPM2.5                 8059 non-null float64
cwd_frac               8059 non-null float64
duff_frac              8059 non-null float64
fuel_moisture_class    8059 non-null float64
burn_source            8059 non-null flo

### Set the number of records to feed to the api

In [9]:
firedata10 = firedata[:10]

In [10]:
weather_df1 = pd.DataFrame(weather_lookup(firedata10,RAPIDAPI_KEY))

In [11]:
db_columns = ['apparentTemperature', 'cloudCover', 'dewPoint',
       'humidity', 'icon', 'latitude', 'longitude',
       'precipIntensity', 'precipProbability', 'precipType', 'pressure',
       'summary', 'temperature', 'time', 'timestamp', 'uvIndex', 'visibility',
       'windBearing', 'windGust', 'windSpeed']

In [12]:
firedata10

Unnamed: 0.1,Unnamed: 0,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,...,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag,datetime
0,0,382358.0,2014.0,195.0,-121.9806,48.7142,131429.0,3.0,1260.0,0.0,...,29.903119,91.148546,0.380435,0.291163,2.0,2.0,12.0,1.0,1.0,2014-07-14 12:00:00
1,1,432142.0,2014.0,224.0,-120.4474,48.9251,130980.0,3.0,1280.0,62500.0,...,36.255639,110.511841,0.321977,0.343797,1.0,3.0,16.0,2.0,1.0,2014-08-12 12:00:00
2,2,427550.0,2014.0,220.0,-124.6477,48.1125,130947.0,1.0,1.0,62500.0,...,0.560682,2.133938,0.0,0.0,2.0,2.0,12.0,2.0,1.0,2014-08-08 12:00:00
3,3,550306.0,2014.0,223.0,-121.57,48.6051,130510.0,3.0,1300.0,62500.0,...,49.681566,151.43579,0.452474,0.268215,2.0,2.0,12.0,2.0,1.0,2014-08-11 12:00:00
4,4,670668.0,2014.0,217.0,-120.2527,48.755,130059.0,3.0,1360.0,62500.0,...,16.713892,50.946089,0.172671,0.229814,1.0,1.0,16.0,4.0,0.0,2014-08-05 12:00:00
5,5,720992.0,2014.0,197.0,-120.8493,48.6106,129593.0,3.0,1260.0,62500.0,...,35.421623,107.969652,0.321165,0.245802,2.0,4.0,77.0,3.0,1.0,2014-07-16 12:00:00
6,6,762711.0,2014.0,277.0,-118.6289,48.9632,129610.0,3.0,1200.0,62500.0,...,28.674507,87.403578,0.30882,0.242211,2.0,2.0,12.0,2.0,1.0,2014-10-04 12:00:00
7,7,771449.0,2014.0,233.0,-121.0134,48.5431,129592.0,3.0,1300.0,62500.0,...,49.681566,151.43579,0.452474,0.268215,2.0,2.0,12.0,2.0,1.0,2014-08-21 12:00:00
8,8,914045.0,2014.0,207.0,-119.2507,48.7473,129144.0,2.0,2.0,62500.0,...,0.423641,1.612364,0.0,0.0,2.0,3.0,77.0,2.0,1.0,2014-07-26 12:00:00
9,9,928884.0,2014.0,164.0,-122.3052,48.1824,129120.0,3.0,1200.0,0.0,...,24.940427,76.021624,0.286874,0.241068,3.0,4.0,77.0,1.0,1.0,2014-06-13 12:00:00


In [13]:
weather_df1 = weather_df1[db_columns]

### Append weather records to database table / read down database and save as csv for backup

In [14]:
weather_df1.to_sql('weather_loading', con = engine, if_exists = 'append', chunksize = 100000)

In [15]:
query = """SELECT * FROM weather_loading;
"""

In [16]:
weather_all_sql = pd.read_sql(query, engine)

In [17]:
weather_all_sql.to_csv('weather_records_saved.csv')

In [18]:
weather_all_sql.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 753577 entries, 0 to 753576
Data columns (total 23 columns):
index                  753577 non-null int64
apparentTemperature    737817 non-null float64
cloudCover             590283 non-null float64
dewPoint               733948 non-null float64
humidity               733691 non-null float64
icon                   566220 non-null object
latitude               753577 non-null float64
longitude              753577 non-null float64
precipAccumulation     2258 non-null float64
precipIntensity        691246 non-null float64
precipProbability      691246 non-null float64
precipType             59712 non-null object
pressure               465964 non-null float64
summary                571640 non-null object
temperature            737817 non-null float64
time                   753577 non-null int64
timestamp              753577 non-null object
uvIndex                590283 non-null float64
visibility             601595 non-null float64
windBea