# Create Fire Brightness Model Input

In [39]:
import pandas as pd
import numpy as np, time
from geopy.distance import great_circle
import math
import seaborn as sns
from scipy import spatial
import matplotlib.pyplot as plt
import datetime

from sqlalchemy import create_engine

import config
import config1


### Load Data

In [40]:
config.db_name

'db27bvsdruzh45'

In [41]:
# create sqlalchemy engine to access database:
engine = create_engine("mysql+mysqlconnector://{user}:{password}@{host}/{dbname}"
                       .format(user=config.db_user,
                               password=config.db_pass,
                               dbname=config.db_main,
                               host=config.db_host))

In [42]:
#Skip this block if reading from local disk

#Create Query Statments
queryWeather = """
SELECT *
FROM Weather;
"""
queryEmissions = """
SELECT *
FROM Emissions_Data
LIMIT 50000;
"""

In [6]:
# Expanding number of columns:
pd.set_option('display.max_columns', 40)

In [7]:
# Load NASA data as primary data set for Model Table - CSV or from DB. 
NASA_M6 = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/NASA-to-ClustserRef/NASA_M6_FullData_ClusterRef_May30.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [8]:
# Load Emissions data as primary data set for Model Table. 
emdata = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/Emissions Cluster Data/emissions_03_15_v5.24.csv')

In [9]:
print(NASA_M6.shape)
NASA_M6.head()

(2159441, 21)


Unnamed: 0.1,Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,datetime,doy,month,year,target_clusterref
0,0,38.8142,-93.5539,300.8,1.0,1.0,2003-01-01,423,Terra,MODIS,33,6.2,267.0,10.4,N,0,2003-01-01,1,1,2003,2003_1980
1,1,19.3739,-155.113,318.8,3.1,1.7,2003-01-01,914,Terra,MODIS,97,6.2,288.7,98.4,N,2,2003-01-01,1,1,2003,
2,2,19.3723,-155.1197,316.0,3.1,1.7,2003-01-01,914,Terra,MODIS,90,6.2,288.3,85.0,N,2,2003-01-01,1,1,2003,
3,3,19.3589,-155.1107,325.2,3.1,1.7,2003-01-01,914,Terra,MODIS,100,6.2,292.0,131.8,N,2,2003-01-01,1,1,2003,
4,4,19.3573,-155.1174,319.0,3.1,1.7,2003-01-01,914,Terra,MODIS,97,6.2,290.3,97.3,N,2,2003-01-01,1,1,2003,


In [10]:
# Remove NASA records where a cluster reference was not tagged, due to current clusters beign too far away. 
# Team added NASA record for those where the lat/long was within 150 euclidean distance from the clust reference centerpoint.

NASA_M6_Model = NASA_M6[NASA_M6.target_clusterref.notnull()]
print(NASA_M6_Model.shape) #Shape will be 1.35m records from original NASA dataset of 2.15m records. Original data also includes 2016-2019 which are not used at this time. 
NASA_M6_Model.head()

(1352421, 21)


Unnamed: 0.1,Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,datetime,doy,month,year,target_clusterref
0,0,38.8142,-93.5539,300.8,1.0,1.0,2003-01-01,423,Terra,MODIS,33,6.2,267.0,10.4,N,0,2003-01-01,1,1,2003,2003_1980
21,21,34.5954,-78.6218,306.5,1.3,1.1,2003-01-01,1624,Terra,MODIS,64,6.2,289.2,11.0,D,0,2003-01-01,1,1,2003,2003_4279
22,22,33.4182,-110.8618,307.6,1.2,1.1,2003-01-01,1803,Terra,MODIS,66,6.2,285.1,10.8,D,2,2003-01-01,1,1,2003,2003_1522
23,23,29.712,-95.1284,307.2,1.0,1.0,2003-01-01,1934,Aqua,MODIS,65,6.2,294.1,5.6,D,0,2003-01-01,1,1,2003,2003_919
24,24,28.9161,-98.6293,313.3,1.3,1.1,2003-01-01,1934,Aqua,MODIS,69,6.2,297.4,12.0,D,0,2003-01-01,1,1,2003,2003_777


In [11]:
print(emdata.shape) # Emissions records are 5.93m records that are the non-null data records for emissions. 
emdata.head(10)

(5936438, 21)


Unnamed: 0.1,Unnamed: 0,id,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV,cluster_reference
0,0,0,2008,359,-81.0384,25.1958,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0
1,1,1,2008,359,-81.0404,25.1984,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0
2,2,2,2008,359,-81.038,25.1981,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0
3,3,4,2008,359,-81.0594,25.2035,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0
4,4,5,2008,359,-81.057,25.2032,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0
5,5,6,2008,359,-81.0545,25.2028,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0
6,6,7,2008,359,-81.0521,25.2025,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0
7,7,8,2008,359,-81.0473,25.2018,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0
8,8,9,2008,359,-81.0449,25.2014,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0
9,9,10,2008,359,-81.04,25.2007,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0


In [12]:
#researching a single cluster reference to see how the data looks across cluster:
emdata.loc[emdata['cluster_reference'] == '2003_1980']

Unnamed: 0.1,Unnamed: 0,id,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV,cluster_reference
5907041,5907041,7205653,2003,81,-93.3088,39.3217,1,1,62500.0,238.967352,222.239637,372.695872,15.556775,0.595602,2.266844,0.0,0.0,2,12,2,2003_1980
5907042,5907042,7205654,2003,83,-93.3058,39.3217,3,2700,0.0,2639.337818,1979.738069,3076.51296,263.305163,14.808441,45.138028,0.162671,0.157472,2,12,1,2003_1980


### Create new Feature for model input - Season using DOY:

In [13]:
# Function to create a feature by determining the Season by DOY = 0 is Spring, 1 is Summer, 2 is Fall and 3 is Winter:
# spring = range(80, 172)
# summer = range(172, 264)
# fall = range(264, 355)  
# winter is else, i guess you need an Else!

def get_season(doy):
    """
    convert date to month and day as integer (md), e.g. 4/21 = 421, 11/17 = 1117, etc.
    """
    #m = date.month * 100
    #d = date.day
    #md = m + d
    
    if ((doy >= 80) and (doy <= 172)):
        s = 0  # spring
    elif ((doy > 172) and (doy < 264)):
        s = 1  # summer
    elif ((doy >= 264) and (doy <= 355)):
        s = 2  # fall
    #elif ((doy > 355) and (doy < 80)):
        #s = 3  # winter
    else:
        s = 3
     #   raise IndexError("Invalid date")
    return s

In [None]:
#Testing function to derive the season and tag the season code. 
print(get_season(265))

In [None]:
NASA_M6_Model['season'] = NASA_M6_Model.apply(lambda x: get_season(x['doy']),axis=1)  # Creating Season per DOY and tagging it as a new column in the dataframe. 

In [None]:
NASA_M6_Model.head()

## Prepare Feature data points by Cluster Reference for input into Model Table:

In [14]:
# Cluster Reference
# GroupBy Cluster Reference and determine the most frequent Covertype by group and into DF. 

#covertype = emdata.groupby( [ "cluster_reference"] ).value_counts().to_frame(name = 'covertype').reset_index()
#covertype = emdata.groupby( ['cluster_reference', 'covertype'] ).size().to_frame(name = 'covermode').reset_index()
#covertype = emdata.groupby( [ 'cluster_reference', 'covertype'] ).agg(lambda x:x.value_counts().index[0])
covertypedf = emdata.groupby('cluster_reference')['covertype'].agg(pd.Series.mode).to_frame(name = 'covertype').reset_index()
covertypedf.head()

Unnamed: 0,cluster_reference,covertype
0,2003_0,1
1,2003_1,3
2,2003_10,1
3,2003_100,2
4,2003_1000,3


In [15]:
covertypedf['covertype2'] = covertypedf.apply(lambda x: (x['covertype'].max()),axis=1)  # If there are only two records, the next line will pull just the max value. 
covertypedf.head() 

Unnamed: 0,cluster_reference,covertype,covertype2
0,2003_0,1,1
1,2003_1,3,3
2,2003_10,1,1
3,2003_100,2,2
4,2003_1000,3,3


In [16]:
# Fuel Code
fuelcodedf = emdata.groupby('cluster_reference')['fuelcode'].agg(pd.Series.mode).to_frame(name = 'fuelcode').reset_index()  # Determine the most frequent fuel code across cluster. 
fuelcodedf.head()

Unnamed: 0,cluster_reference,fuelcode
0,2003_0,1
1,2003_1,1600
2,2003_10,1
3,2003_100,2
4,2003_1000,2700


In [17]:
fuelcodedf['fuelcode2'] = fuelcodedf.apply(lambda x: (x['fuelcode'].max()),axis=1)

In [18]:
# Prefire Fuel - take median value across cluster for input into model input. 
prefirefuel = emdata.groupby('cluster_reference')['prefire_fuel'].median().to_frame(name = 'prefire_fuel').reset_index()
prefirefuel.head()

Unnamed: 0,cluster_reference,prefire_fuel
0,2003_0,337.042602
1,2003_1,6220.097576
2,2003_10,162.188442
3,2003_100,3390.377328
4,2003_1000,2639.337818


In [19]:
fuel_moisture_class = emdata.groupby('cluster_reference')['fuel_moisture_class'].agg(pd.Series.mode).to_frame(name = 'fuel_moisture_class').reset_index()
fuel_moisture_class.head()

Unnamed: 0,cluster_reference,fuel_moisture_class
0,2003_0,2
1,2003_1,4
2,2003_10,2
3,2003_100,2
4,2003_1000,3


In [20]:
fuel_moisture_class['fuel_moisture_class2'] = fuel_moisture_class.apply(lambda x: (x['fuel_moisture_class'].max()),axis=1)

In [21]:
NASA_M6_Model = NASA_M6_Model.rename(columns={'target_clusterref':'cluster_reference'})
NASA_M6_Model.head()

Unnamed: 0.1,Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,datetime,doy,month,year,cluster_reference
0,0,38.8142,-93.5539,300.8,1.0,1.0,2003-01-01,423,Terra,MODIS,33,6.2,267.0,10.4,N,0,2003-01-01,1,1,2003,2003_1980
21,21,34.5954,-78.6218,306.5,1.3,1.1,2003-01-01,1624,Terra,MODIS,64,6.2,289.2,11.0,D,0,2003-01-01,1,1,2003,2003_4279
22,22,33.4182,-110.8618,307.6,1.2,1.1,2003-01-01,1803,Terra,MODIS,66,6.2,285.1,10.8,D,2,2003-01-01,1,1,2003,2003_1522
23,23,29.712,-95.1284,307.2,1.0,1.0,2003-01-01,1934,Aqua,MODIS,65,6.2,294.1,5.6,D,0,2003-01-01,1,1,2003,2003_919
24,24,28.9161,-98.6293,313.3,1.3,1.1,2003-01-01,1934,Aqua,MODIS,69,6.2,297.4,12.0,D,0,2003-01-01,1,1,2003,2003_777


---
### Merge features from Emissions data into Fire Brightness dataframe:

In [None]:
NASA_M6_Model = NASA_M6_Model.merge(right=covertypedf.loc[:,['cluster_reference', 'covertype2']],  # Merging data into NASA M6, changing by DF above and column name. 
                   how='left',
                   left_on=['cluster_reference'],
                   right_on=['cluster_reference'])


In [None]:
NASA_M6_Model.head(5)

In [None]:
# Drop columns that are not needed for model features:
NASA_M6_Model = NASA_M6_Model.drop(columns=['Unnamed: 0','scan', 'track', 'acq_date', 'acq_date', 'acq_date', 'satellite', 'instrument',
                                            'confidence', 'version', 'type', 'datetime', 'prefirefuel','prefire_fuel_x','fuel_moisture_class_x'])

In [None]:
NASA_M6_Model = NASA_M6_Model.drop(columns=['covertype', 'fuelcode', 'fuel_moisture_class_y'])

In [None]:
NASA_M6_Model = NASA_M6_Model.rename(columns={'prefire_fuel_':'prefire_fuel'})

In [None]:
NASA_M6_Model.head(5)

In [None]:
# Reorder to make it pretty :) 
NASA_M6_Model = NASA_M6_Model[['latitude', 'longitude', 'doy', 'month', 'year','cluster_reference', 'season', 'covertype', 'fuelcode', 'prefire_fuel','fuel_moisture_class',
                               'brightness', 'bright_t31', 'frp']]

In [None]:
print(NASA_M6_Model.shape)
NASA_M6_Model.head()

In [None]:
# bro, save it down!
NASA_M6_Model.to_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/FireIntensity_Model_June1.csv',index=False)
                     

--- 
## Merge weather data from DB into Fire Brightness dataframe:

In [74]:
# Load pending Fire Brightness Model dataframe:
NASA_M6_Model = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/FireIntensity_Model_June1.csv')

In [75]:
print(NASA_M6_Model.shape)
NASA_M6_Model.head()

(1352421, 14)


Unnamed: 0,latitude,longitude,doy,month,year,cluster_reference,season,covertype,fuelcode,prefire_fuel,fuel_moisture_class,brightness,bright_t31,frp
0,38.8142,-93.5539,1,1,2003,2003_1980,3,3,2700,1439.152585,2,300.8,267.0,10.4
1,34.5954,-78.6218,1,1,2003,2003_4279,3,3,1600,6220.097576,3,306.5,289.2,11.0
2,33.4182,-110.8618,1,1,2003,2003_1522,3,3,1220,4534.187262,2,307.6,285.1,10.8
3,29.712,-95.1284,1,1,2003,2003_919,3,1,1,277.41285,2,307.2,294.1,5.6
4,28.9161,-98.6293,1,1,2003,2003_777,3,1,1,251.296812,2,313.3,297.4,12.0


In [32]:
NASA_M6_Model.info()  # checking for null values in the FB model df. 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1352421 entries, 0 to 1352420
Data columns (total 14 columns):
latitude               1352421 non-null float64
longitude              1352421 non-null float64
doy                    1352421 non-null int64
month                  1352421 non-null int64
year                   1352421 non-null int64
cluster_reference      1352421 non-null object
season                 1352421 non-null int64
covertype              1352421 non-null int64
fuelcode               1352421 non-null int64
prefire_fuel           1352421 non-null float64
fuel_moisture_class    1352421 non-null int64
brightness             1352421 non-null float64
bright_t31             1352421 non-null float64
frp                    1352421 non-null float64
dtypes: float64(6), int64(7), object(1)
memory usage: 144.5+ MB


In [35]:
dfWeather = pd.read_sql(queryWeather, engine)  # Open up weather data from DB. 

In [36]:
dfWeather.info() # Check for Null Values / note - some features faired better from a data pull perspective. Will clean here. 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 723890 entries, 0 to 723889
Data columns (total 15 columns):
Key                 723890 non-null int64
cluster_ref         723890 non-null object
lat                 723890 non-null float64
lon                 723890 non-null float64
temperature         708836 non-null float64
humidity            704865 non-null float64
precip_intensity    664166 non-null float64
pressure            448590 non-null float64
uv_index            567485 non-null float64
visibility          578296 non-null float64
wind_speed          704241 non-null float64
wind_gust           651392 non-null float64
Date                723890 non-null datetime64[ns]
year                723890 non-null int64
doy                 723890 non-null int64
dtypes: datetime64[ns](1), float64(10), int64(3), object(1)
memory usage: 82.8+ MB


In [101]:
dfWeather.head(1)

Unnamed: 0,Key,cluster_reference,lat,lon,temperature,humidity,precip_intensity,pressure,uv_index,visibility,wind_speed,wind_gust,Date,year,doy
0,1,unknown,24.6801,-81.3504,90.67,0.8,0.0,1018.5,7.0,7.781,12.71,16.1,2003-05-05 12:00:00,2003,125


In [80]:
dfWeather = dfWeather.rename(columns={'cluster_ref':'cluster_reference'})

In [102]:
dfWeather.head(5)

Unnamed: 0,Key,cluster_reference,lat,lon,temperature,humidity,precip_intensity,pressure,uv_index,visibility,wind_speed,wind_gust,Date,year,doy
0,1,unknown,24.6801,-81.3504,90.67,0.8,0.0,1018.5,7.0,7.781,12.71,16.1,2003-05-05 12:00:00,2003,125
1,2,2003_2323,24.6801,-81.3504,88.11,0.76,0.0,1018.8,9.0,8.656,7.19,7.19,2003-05-04 12:00:00,2003,124
2,3,2003_2323,24.6801,-81.3504,97.81,0.67,0.0,1019.2,9.0,8.656,11.39,12.67,2003-07-23 12:00:00,2003,204
3,4,unknown,24.7146,-81.3868,94.77,0.52,0.0,1018.2,8.0,9.997,5.66,10.7,2011-09-16 12:00:00,2011,259
4,5,2011_3601,24.7146,-81.3868,95.79,0.5,0.0,1016.1,7.0,9.997,1.5,2.9,2011-09-15 12:00:00,2011,258


In [89]:
dfWeather.loc[(dfWeather['lat'] == 39.3217)]

Unnamed: 0,Key,cluster_reference,lat,lon,temperature,humidity,precip_intensity,pressure,uv_index,visibility,wind_speed,wind_gust,Date,year,doy
579193,579194,unknown,39.3217,-93.3058,,,,,,,,,2003-03-25 12:00:00,2003,84
579194,579195,2003_1980,39.3217,-93.3058,,,,,,,,,2003-01-01 12:00:00,2003,1
579195,579196,2003_1980,39.3217,-93.3058,,,,,,,,,2003-01-07 12:00:00,2003,7
579196,579197,2003_1980,39.3217,-93.3058,,,,,,,,,2003-01-09 12:00:00,2003,9
579197,579198,2003_1980,39.3217,-93.3058,,,,,,,,,2003-01-13 12:00:00,2003,13
579198,579199,2003_1980,39.3217,-93.3058,,,,,,,,,2003-01-14 12:00:00,2003,14
579199,579200,2003_1980,39.3217,-93.3058,,,,,,,,,2003-03-17 12:00:00,2003,76
579200,579201,2003_1980,39.3217,-93.3058,,,,,,,,,2003-03-22 12:00:00,2003,81
579201,579202,2003_1980,39.3217,-93.3058,,,,,,,,,2003-03-24 12:00:00,2003,83
579202,579203,2003_1980,39.3217,-93.3058,,,,,,,,,2003-03-26 12:00:00,2003,85


### Merge Weather data into Fire Brightness model input:

In [83]:
# Merging data into NASA M6, changing by DF above and column name. 
NASA_M6_Model_Weather3 = NASA_M6_Model.merge(right=dfWeather.loc[:,['cluster_reference', 'temperature', 'humidity', 
                                                                    'precip_intensity', 'pressure', 'uv_index', 'visibility', 'wind_speed','wind_gust','doy']],
                   how='left',
                   left_on=['cluster_reference', 'doy'],
                   right_on=['cluster_reference', 'doy'])

In [84]:
NASA_M6_Model_Weather3[0:15]

Unnamed: 0,latitude,longitude,doy,month,year,cluster_reference,season,covertype,fuelcode,prefire_fuel,fuel_moisture_class,brightness,bright_t31,frp,temperature,humidity,precip_intensity,pressure,uv_index,visibility,wind_speed,wind_gust
0,38.8142,-93.5539,1,1,2003,2003_1980,3,3,2700,1439.152585,2,300.8,267.0,10.4,,,,,,,,
1,34.5954,-78.6218,1,1,2003,2003_4279,3,3,1600,6220.097576,3,306.5,289.2,11.0,64.14,0.88,0.011,1004.1,2.0,9.022,6.42,12.51
2,33.4182,-110.8618,1,1,2003,2003_1522,3,3,1220,4534.187262,2,307.6,285.1,10.8,32.17,0.37,,,,,6.88,18.5
3,29.712,-95.1284,1,1,2003,2003_919,3,1,1,277.41285,2,307.2,294.1,5.6,65.97,0.5,,1011.7,4.0,9.997,10.98,16.9
4,28.9161,-98.6293,1,1,2003,2003_777,3,1,1,251.296812,2,313.3,297.4,12.0,72.89,0.51,0.0,1008.0,4.0,9.997,25.06,28.59
5,33.1976,-100.2702,1,1,2003,2003_1370,3,2,2,750.808071,2,302.6,288.5,7.1,,,,,,,,
6,31.9433,-98.9875,1,1,2003,2003_1278,3,2,2,470.424942,2,304.5,290.7,7.2,,,,,,,,
7,32.7772,-95.0444,1,1,2003,2003_3100,3,1,1,173.17287,2,301.3,289.9,4.2,58.39,0.3,,,,,7.99,10.99
8,32.2024,-94.6048,1,1,2003,2003_3015,3,3,1400,6935.101645,2,302.1,288.5,5.1,58.03,0.36,,1009.3,3.0,9.997,9.16,10.99
9,33.3287,-95.5265,1,1,2003,2003_1290,3,1,1,393.589989,2,317.5,288.3,15.9,52.5,0.49,,,3.0,9.997,13.82,


In [90]:
NASA_M6_Model_Weather3.info()  

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1374100 entries, 0 to 1374099
Data columns (total 22 columns):
latitude               1374100 non-null float64
longitude              1374100 non-null float64
doy                    1374100 non-null int64
month                  1374100 non-null int64
year                   1374100 non-null int64
cluster_reference      1374100 non-null object
season                 1374100 non-null int64
covertype              1374100 non-null int64
fuelcode               1374100 non-null int64
prefire_fuel           1374100 non-null float64
fuel_moisture_class    1374100 non-null int64
brightness             1374100 non-null float64
bright_t31             1374100 non-null float64
frp                    1374100 non-null float64
temperature            1350456 non-null float64
humidity               1343467 non-null float64
precip_intensity       1275730 non-null float64
pressure               797996 non-null float64
uv_index               981562 non-null 

NASA_M6_Model_Weather3.loc[(NASA_M6_Model_Weather3['cluster_reference'] == '2003_1522')]

nasa_emissions_doycombined = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/nasa_emissions_doycombined_May28.csv')

nasa_emissions_doycombined.head(20)

nasa_emissions_doycombined.loc[(nasa_emissions_doycombined['cluster_reference'] == '2003_1980')]

In [104]:
# Reorder DF. 
NASA_M6_Model_Weather3 = NASA_M6_Model_Weather3[['latitude', 'longitude', 'doy', 'month', 'year','cluster_reference', 'season', 'covertype', 'fuelcode', 'prefire_fuel','fuel_moisture_class',
                                                 'temperature', 'humidity', 'precip_intensity', 'pressure', 'uv_index', 'visibility', 'wind_speed','wind_gust','brightness', 'bright_t31', 'frp']]

In [99]:
print(NASA_M6_Model_Weather3.shape)
NASA_M6_Model_Weather3.head(10)

(1374100, 22)


Unnamed: 0,latitude,longitude,doy,month,year,cluster_reference,season,covertype,fuelcode,prefire_fuel,fuel_moisture_class,temperature,humidity,precip_intensity,pressure,uv_index,visibility,wind_speed,wind_gust,brightness,bright_t31,frp
0,38.8142,-93.5539,1,1,2003,2003_1980,3,3,2700,1439.152585,2,,,,,,,,,300.8,267.0,10.4
1,34.5954,-78.6218,1,1,2003,2003_4279,3,3,1600,6220.097576,3,64.14,0.88,0.011,1004.1,2.0,9.022,6.42,12.51,306.5,289.2,11.0
2,33.4182,-110.8618,1,1,2003,2003_1522,3,3,1220,4534.187262,2,32.17,0.37,,,,,6.88,18.5,307.6,285.1,10.8
3,29.712,-95.1284,1,1,2003,2003_919,3,1,1,277.41285,2,65.97,0.5,,1011.7,4.0,9.997,10.98,16.9,307.2,294.1,5.6
4,28.9161,-98.6293,1,1,2003,2003_777,3,1,1,251.296812,2,72.89,0.51,0.0,1008.0,4.0,9.997,25.06,28.59,313.3,297.4,12.0
5,33.1976,-100.2702,1,1,2003,2003_1370,3,2,2,750.808071,2,,,,,,,,,302.6,288.5,7.1
6,31.9433,-98.9875,1,1,2003,2003_1278,3,2,2,470.424942,2,,,,,,,,,304.5,290.7,7.2
7,32.7772,-95.0444,1,1,2003,2003_3100,3,1,1,173.17287,2,58.39,0.3,,,,,7.99,10.99,301.3,289.9,4.2
8,32.2024,-94.6048,1,1,2003,2003_3015,3,3,1400,6935.101645,2,58.03,0.36,,1009.3,3.0,9.997,9.16,10.99,302.1,288.5,5.1
9,33.3287,-95.5265,1,1,2003,2003_1290,3,1,1,393.589989,2,52.5,0.49,,,3.0,9.997,13.82,,317.5,288.3,15.9


In [97]:
NASA_M6_Model_Weather3.to_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/FireIntensity_Model_June4.csv',index=False)

---
## Work to clean up Fire Brightness DF for weather nulls:

In [4]:
Fire_Brightness = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/FireIntensity_Model_June4.csv')

In [5]:
print(Fire_Brightness.shape)
Fire_Brightness.head()

(1374100, 22)


Unnamed: 0,latitude,longitude,doy,month,year,cluster_reference,season,covertype,fuelcode,prefire_fuel,...,humidity,precip_intensity,pressure,uv_index,visibility,wind_speed,wind_gust,brightness,bright_t31,frp
0,38.8142,-93.5539,1,1,2003,2003_1980,3,3,2700,1439.152585,...,,,,,,,,300.8,267.0,10.4
1,34.5954,-78.6218,1,1,2003,2003_4279,3,3,1600,6220.097576,...,0.88,0.011,1004.1,2.0,9.022,6.42,12.51,306.5,289.2,11.0
2,33.4182,-110.8618,1,1,2003,2003_1522,3,3,1220,4534.187262,...,0.37,,,,,6.88,18.5,307.6,285.1,10.8
3,29.712,-95.1284,1,1,2003,2003_919,3,1,1,277.41285,...,0.5,,1011.7,4.0,9.997,10.98,16.9,307.2,294.1,5.6
4,28.9161,-98.6293,1,1,2003,2003_777,3,1,1,251.296812,...,0.51,0.0,1008.0,4.0,9.997,25.06,28.59,313.3,297.4,12.0


In [6]:
Fire_Brightness = Fire_Brightness.drop_duplicates(subset=['doy', 'year', 'longitude', 'latitude', 'cluster_reference'], keep='first')

In [7]:
print(Fire_Brightness.shape)
Fire_Brightness.head()

(1352421, 22)


Unnamed: 0,latitude,longitude,doy,month,year,cluster_reference,season,covertype,fuelcode,prefire_fuel,...,humidity,precip_intensity,pressure,uv_index,visibility,wind_speed,wind_gust,brightness,bright_t31,frp
0,38.8142,-93.5539,1,1,2003,2003_1980,3,3,2700,1439.152585,...,,,,,,,,300.8,267.0,10.4
1,34.5954,-78.6218,1,1,2003,2003_4279,3,3,1600,6220.097576,...,0.88,0.011,1004.1,2.0,9.022,6.42,12.51,306.5,289.2,11.0
2,33.4182,-110.8618,1,1,2003,2003_1522,3,3,1220,4534.187262,...,0.37,,,,,6.88,18.5,307.6,285.1,10.8
3,29.712,-95.1284,1,1,2003,2003_919,3,1,1,277.41285,...,0.5,,1011.7,4.0,9.997,10.98,16.9,307.2,294.1,5.6
4,28.9161,-98.6293,1,1,2003,2003_777,3,1,1,251.296812,...,0.51,0.0,1008.0,4.0,9.997,25.06,28.59,313.3,297.4,12.0


In [8]:
# Dropping two features given the number of Nulls. 
Fire_Brightness = Fire_Brightness.drop(columns=['pressure','uv_index'])

In [9]:
Fire_Brightness.head(20)

Unnamed: 0,latitude,longitude,doy,month,year,cluster_reference,season,covertype,fuelcode,prefire_fuel,fuel_moisture_class,temperature,humidity,precip_intensity,visibility,wind_speed,wind_gust,brightness,bright_t31,frp
0,38.8142,-93.5539,1,1,2003,2003_1980,3,3,2700,1439.152585,2,,,,,,,300.8,267.0,10.4
1,34.5954,-78.6218,1,1,2003,2003_4279,3,3,1600,6220.097576,3,64.14,0.88,0.011,9.022,6.42,12.51,306.5,289.2,11.0
2,33.4182,-110.8618,1,1,2003,2003_1522,3,3,1220,4534.187262,2,32.17,0.37,,,6.88,18.5,307.6,285.1,10.8
3,29.712,-95.1284,1,1,2003,2003_919,3,1,1,277.41285,2,65.97,0.5,,9.997,10.98,16.9,307.2,294.1,5.6
4,28.9161,-98.6293,1,1,2003,2003_777,3,1,1,251.296812,2,72.89,0.51,0.0,9.997,25.06,28.59,313.3,297.4,12.0
5,33.1976,-100.2702,1,1,2003,2003_1370,3,2,2,750.808071,2,,,,,,,302.6,288.5,7.1
6,31.9433,-98.9875,1,1,2003,2003_1278,3,2,2,470.424942,2,,,,,,,304.5,290.7,7.2
7,32.7772,-95.0444,1,1,2003,2003_3100,3,1,1,173.17287,2,58.39,0.3,,,7.99,10.99,301.3,289.9,4.2
8,32.2024,-94.6048,1,1,2003,2003_3015,3,3,1400,6935.101645,2,58.03,0.36,,9.997,9.16,10.99,302.1,288.5,5.1
9,33.3287,-95.5265,1,1,2003,2003_1290,3,1,1,393.589989,2,52.5,0.49,,9.997,13.82,,317.5,288.3,15.9


In [10]:
Fire_Brightness = Fire_Brightness.dropna(subset=['temperature'])  

In [11]:
Fire_Brightness[['precip_intensity']] = Fire_Brightness[['precip_intensity']].fillna(value=0)

In [25]:
Fire_Brightness.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1328922 entries, 1 to 1374099
Data columns (total 20 columns):
latitude               1328922 non-null float64
longitude              1328922 non-null float64
doy                    1328922 non-null int64
month                  1328922 non-null int64
year                   1328922 non-null int64
cluster_reference      1328922 non-null object
season                 1328922 non-null int64
covertype              1328922 non-null int64
fuelcode               1328922 non-null int64
prefire_fuel           1328922 non-null float64
fuel_moisture_class    1328922 non-null int64
temperature            1328922 non-null float64
humidity               1322121 non-null float64
precip_intensity       1328922 non-null float64
visibility             1000739 non-null float64
wind_speed             1320234 non-null float64
wind_gust              1229423 non-null float64
brightness             1328922 non-null float64
bright_t31             1328922 non-nul

In [22]:
# Creating Means for features
humidity_mean = Fire_Brightness['humidity'].mean()
visibility_mean = Fire_Brightness['visibility'].mean()
wind_speed_mean = Fire_Brightness['wind_speed'].mean()
wind_gust_mean = Fire_Brightness['wind_gust'].mean()

print(humidity_mean, visibility_mean, wind_speed_mean, wind_gust_mean)

0.41109985394693355 9.216293016572765 6.365645393164141 11.681997644426186


In [27]:
weather_values = {'humidity': humidity_mean, 'visibility': visibility_mean, 'wind_speed': wind_speed_mean, 'wind_gust': wind_gust_mean}
Fire_Brightness = Fire_Brightness.fillna(value=weather_values)

In [28]:
Fire_Brightness.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1328922 entries, 1 to 1374099
Data columns (total 20 columns):
latitude               1328922 non-null float64
longitude              1328922 non-null float64
doy                    1328922 non-null int64
month                  1328922 non-null int64
year                   1328922 non-null int64
cluster_reference      1328922 non-null object
season                 1328922 non-null int64
covertype              1328922 non-null int64
fuelcode               1328922 non-null int64
prefire_fuel           1328922 non-null float64
fuel_moisture_class    1328922 non-null int64
temperature            1328922 non-null float64
humidity               1328922 non-null float64
precip_intensity       1328922 non-null float64
visibility             1328922 non-null float64
wind_speed             1328922 non-null float64
wind_gust              1328922 non-null float64
brightness             1328922 non-null float64
bright_t31             1328922 non-nul

In [29]:
Fire_Brightness.to_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/FireIntensity_Model_June4_Clean.csv',index=False)

In [43]:
Fire_Brightness.to_sql('Fire_Intensity_Model_Clean', con=engine, if_exists='replace')

Exception during reset or similar
Traceback (most recent call last):
  File "/Users/AlfHaugen/anaconda3/lib/python3.7/site-packages/sqlalchemy/pool/base.py", line 671, in _finalize_fairy
    fairy._reset(pool)
  File "/Users/AlfHaugen/anaconda3/lib/python3.7/site-packages/sqlalchemy/pool/base.py", line 858, in _reset
    pool._dialect.do_rollback(self)
  File "/Users/AlfHaugen/anaconda3/lib/python3.7/site-packages/sqlalchemy/dialects/mysql/base.py", line 2227, in do_rollback
    dbapi_connection.rollback()
  File "/Users/AlfHaugen/anaconda3/lib/python3.7/site-packages/mysql/connector/connection_cext.py", line 386, in rollback
    self._cmysql.rollback()
_mysql_connector.MySQLInterfaceError: MySQL server has gone away


MySQLInterfaceError: MySQL server has gone away