# Creating the BSEV Model Input from Emissions/Weather Data. 

In [65]:
import pandas as pd
import numpy as np, time
from geopy.distance import great_circle
import math
import seaborn as sns
from scipy import spatial
import matplotlib.pyplot as plt
import datetime

from sqlalchemy import create_engine

import config
import config1


In [66]:
# Expanding number of columns:
pd.set_option('display.max_columns', 40)

### Load Data

In [67]:
config.db_name

'db27bvsdruzh45'

In [68]:
# create sqlalchemy engine to access database:
engine = create_engine("mysql+mysqlconnector://{user}:{password}@{host}/{dbname}"
                       .format(user=config.db_user,
                               password=config.db_pass,
                               dbname=config.db_main,
                               host=config.db_host))

In [69]:
#Skip this block if reading from local disk

#Create Query Statments
queryWeather = """
SELECT *
FROM Weather;
"""
queryEmissions = """
SELECT *
FROM Emissions_Data
LIMIT 50000;
"""

In [7]:
# Load NASA data as primary data set for Model Table - CSV or from DB. 
NASA_M6 = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/NASA-to-ClustserRef/NASA_M6_FullData_ClusterRef_May30.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [7]:
# Load Emissions data as primary data set for Model Table. 
emdata = pd.read_csv('../data/emissions_03_15_v5.24.csv')

In [8]:
print(emdata.shape)
emdata.head()

(5936438, 21)


Unnamed: 0.1,Unnamed: 0,id,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV,cluster_reference
0,0,0,2008,359,-81.0384,25.1958,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0
1,1,1,2008,359,-81.0404,25.1984,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0
2,2,2,2008,359,-81.038,25.1981,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0
3,3,4,2008,359,-81.0594,25.2035,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0
4,4,5,2008,359,-81.057,25.2032,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0


In [9]:
#researching a single cluster reference to see how the data looks across cluster:
emdata.loc[emdata['cluster_reference'] == '2003_1980']

Unnamed: 0.1,Unnamed: 0,id,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV,cluster_reference
5907041,5907041,7205653,2003,81,-93.3088,39.3217,1,1,62500.0,238.967352,222.239637,372.695872,15.556775,0.595602,2.266844,0.0,0.0,2,12,2,2003_1980
5907042,5907042,7205654,2003,83,-93.3058,39.3217,3,2700,0.0,2639.337818,1979.738069,3076.51296,263.305163,14.808441,45.138028,0.162671,0.157472,2,12,1,2003_1980


### Create new Feature for model input - Season using DOY:

In [10]:
# Function to create a feature by determining the Season by DOY = 0 is Spring, 1 is Summer, 2 is Fall and 3 is Winter:
# spring = range(80, 172)
# summer = range(172, 264)
# fall = range(264, 355)  
# winter is else, i guess you need an Else!

def get_season(doy):

    if ((doy >= 80) and (doy <= 172)):
        s = 0  # spring
    elif ((doy > 172) and (doy < 264)):
        s = 1  # summer
    elif ((doy >= 264) and (doy <= 355)):
        s = 2  # fall
    #elif ((doy > 355) and (doy < 80)):
        #s = 3  # winter
    else:
        s = 3
     #   raise IndexError("Invalid date")
    return s

In [11]:
#Testing function to derive the season and tag the season code. 
print(get_season(265))

2


In [12]:
emdata['season'] = emdata.apply(lambda x: get_season(x['doy']),axis=1)  # Creating Season per DOY and tagging it as a new column in the dataframe. 

In [15]:
emdata.head(2)

Unnamed: 0.1,Unnamed: 0,id,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV,cluster_reference,season
0,0,0,2008,359,-81.0384,25.1958,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0,3
1,1,1,2008,359,-81.0404,25.1984,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0,3


# Merging Weather data into the BSEV/Emissions model input.

### Loading weather from SQL. 

In [16]:
dfWeather = pd.read_sql(queryWeather, engine)  # Open up weather data from DB. 

In [17]:
dfWeather.info() # Check for Null Values / note - some features faired better from a data pull perspective. Will clean here. 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 723890 entries, 0 to 723889
Data columns (total 15 columns):
Key                 723890 non-null int64
cluster_ref         723890 non-null object
lat                 723890 non-null float64
lon                 723890 non-null float64
temperature         708836 non-null float64
humidity            704865 non-null float64
precip_intensity    664166 non-null float64
pressure            448590 non-null float64
uv_index            567485 non-null float64
visibility          578296 non-null float64
wind_speed          704241 non-null float64
wind_gust           651392 non-null float64
Date                723890 non-null datetime64[ns]
year                723890 non-null int64
doy                 723890 non-null int64
dtypes: datetime64[ns](1), float64(10), int64(3), object(1)
memory usage: 82.8+ MB


In [18]:
dfWeather.head(1)

Unnamed: 0,Key,cluster_ref,lat,lon,temperature,humidity,precip_intensity,pressure,uv_index,visibility,wind_speed,wind_gust,Date,year,doy
0,1,unknown,24.6801,-81.3504,90.67,0.8,0.0,1018.5,7.0,7.781,12.71,16.1,2003-05-05 12:00:00,2003,125


In [19]:
dfWeather = dfWeather.rename(columns={'cluster_ref':'cluster_reference'})

In [20]:
dfWeather.head(5)

Unnamed: 0,Key,cluster_reference,lat,lon,temperature,humidity,precip_intensity,pressure,uv_index,visibility,wind_speed,wind_gust,Date,year,doy
0,1,unknown,24.6801,-81.3504,90.67,0.8,0.0,1018.5,7.0,7.781,12.71,16.1,2003-05-05 12:00:00,2003,125
1,2,2003_2323,24.6801,-81.3504,88.11,0.76,0.0,1018.8,9.0,8.656,7.19,7.19,2003-05-04 12:00:00,2003,124
2,3,2003_2323,24.6801,-81.3504,97.81,0.67,0.0,1019.2,9.0,8.656,11.39,12.67,2003-07-23 12:00:00,2003,204
3,4,unknown,24.7146,-81.3868,94.77,0.52,0.0,1018.2,8.0,9.997,5.66,10.7,2011-09-16 12:00:00,2011,259
4,5,2011_3601,24.7146,-81.3868,95.79,0.5,0.0,1016.1,7.0,9.997,1.5,2.9,2011-09-15 12:00:00,2011,258


In [21]:
dfWeather.loc[(dfWeather['lat'] == 39.3217)]

Unnamed: 0,Key,cluster_reference,lat,lon,temperature,humidity,precip_intensity,pressure,uv_index,visibility,wind_speed,wind_gust,Date,year,doy
579193,579194,unknown,39.3217,-93.3058,,,,,,,,,2003-03-25 12:00:00,2003,84
579194,579195,2003_1980,39.3217,-93.3058,,,,,,,,,2003-01-01 12:00:00,2003,1
579195,579196,2003_1980,39.3217,-93.3058,,,,,,,,,2003-01-07 12:00:00,2003,7
579196,579197,2003_1980,39.3217,-93.3058,,,,,,,,,2003-01-09 12:00:00,2003,9
579197,579198,2003_1980,39.3217,-93.3058,,,,,,,,,2003-01-13 12:00:00,2003,13
579198,579199,2003_1980,39.3217,-93.3058,,,,,,,,,2003-01-14 12:00:00,2003,14
579199,579200,2003_1980,39.3217,-93.3058,,,,,,,,,2003-03-17 12:00:00,2003,76
579200,579201,2003_1980,39.3217,-93.3058,,,,,,,,,2003-03-22 12:00:00,2003,81
579201,579202,2003_1980,39.3217,-93.3058,,,,,,,,,2003-03-24 12:00:00,2003,83
579202,579203,2003_1980,39.3217,-93.3058,,,,,,,,,2003-03-26 12:00:00,2003,85


### Merge Weather data into Fire Brightness model input:

In [22]:
# Merging data into NASA M6, changing by DF above and column name. 
emdata_weather = emdata.merge(right=dfWeather.loc[:,['cluster_reference', 'temperature', 'humidity', 
                                                                    'precip_intensity', 'pressure', 'uv_index', 'visibility', 'wind_speed','wind_gust','doy']],
                   how='left',
                   left_on=['cluster_reference', 'doy'],
                   right_on=['cluster_reference', 'doy'])

In [24]:
print(emdata_weather.shape)
emdata_weather[0:15]

(6134150, 30)


Unnamed: 0.1,Unnamed: 0,id,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV,cluster_reference,season,temperature,humidity,precip_intensity,pressure,uv_index,visibility,wind_speed,wind_gust
0,0,0,2008,359,-81.0384,25.1958,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
1,1,1,2008,359,-81.0404,25.1984,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
2,2,2,2008,359,-81.038,25.1981,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
3,3,4,2008,359,-81.0594,25.2035,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
4,4,5,2008,359,-81.057,25.2032,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
5,5,6,2008,359,-81.0545,25.2028,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
6,6,7,2008,359,-81.0521,25.2025,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
7,7,8,2008,359,-81.0473,25.2018,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
8,8,9,2008,359,-81.0449,25.2014,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
9,9,10,2008,359,-81.04,25.2007,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06


In [27]:
### Dropping dupes. 
emdata_weather = emdata_weather.drop_duplicates(subset=['doy', 'year', 'longitude', 'latitude', 'cluster_reference'], keep='first')

In [29]:
emdata_weather.info(verbose=True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5936438 entries, 0 to 6134149
Data columns (total 30 columns):
Unnamed: 0             5936438 non-null int64
id                     5936438 non-null int64
year                   5936438 non-null int64
doy                    5936438 non-null int64
longitude              5936438 non-null float64
latitude               5936438 non-null float64
covertype              5936438 non-null int64
fuelcode               5936438 non-null int64
area_burned            5936438 non-null float64
prefire_fuel           5936438 non-null float64
consumed_fuel          5936438 non-null float64
ECO2                   5936438 non-null float64
ECO                    5936438 non-null float64
ECH4                   5936438 non-null float64
EPM2.5                 5936438 non-null float64
cwd_frac               5936438 non-null float64
duff_frac              5936438 non-null float64
fuel_moisture_class    5936438 non-null int64
burnday_source         5936438 non-nu

In [104]:
# Reorder DF. 
emdata_weather = emdata_weather[['latitude', 'longitude', 'doy', 'month', 'year','cluster_reference', 'season', 'covertype', 'fuelcode', 'prefire_fuel','fuel_moisture_class',
                                                 'temperature', 'humidity', 'precip_intensity', 'pressure', 'uv_index', 'visibility', 'wind_speed','wind_gust','brightness', 'bright_t31', 'frp']]

---
## Work to clean up BSEV/Emissions DF for weather nulls:

In [30]:
print(emdata_weather.shape)
emdata_weather.head()

(5936438, 30)


Unnamed: 0.1,Unnamed: 0,id,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV,cluster_reference,season,temperature,humidity,precip_intensity,pressure,uv_index,visibility,wind_speed,wind_gust
0,0,0,2008,359,-81.0384,25.1958,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
1,1,1,2008,359,-81.0404,25.1984,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
2,2,2,2008,359,-81.038,25.1981,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
3,3,4,2008,359,-81.0594,25.2035,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06
4,4,5,2008,359,-81.057,25.2032,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0,3,75.35,0.83,0.0,1025.3,,,17.66,22.06


In [31]:
# Dropping two features given the number of Nulls. 
emdata_weather = emdata_weather.drop(columns=['pressure','uv_index'])

In [32]:
emdata_weather.head(1)

Unnamed: 0.1,Unnamed: 0,id,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV,cluster_reference,season,temperature,humidity,precip_intensity,visibility,wind_speed,wind_gust
0,0,0,2008,359,-81.0384,25.1958,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0,3,75.35,0.83,0.0,,17.66,22.06


In [33]:
emdata_weather = emdata_weather.dropna(subset=['temperature'])  

In [34]:
emdata_weather[['precip_intensity']] = emdata_weather[['precip_intensity']].fillna(value=0)

In [40]:
emdata_weather.info(verbose=True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5717352 entries, 0 to 6134149
Data columns (total 27 columns):
Unnamed: 0             5717352 non-null int64
id                     5717352 non-null int64
year                   5717352 non-null int64
doy                    5717352 non-null int64
longitude              5717352 non-null float64
latitude               5717352 non-null float64
covertype              5717352 non-null int64
fuelcode               5717352 non-null int64
area_burned            5717352 non-null float64
prefire_fuel           5717352 non-null float64
consumed_fuel          5717352 non-null float64
ECO2                   5717352 non-null float64
ECO                    5717352 non-null float64
ECH4                   5717352 non-null float64
EPM2.5                 5717352 non-null float64
cwd_frac               5717352 non-null float64
duff_frac              5717352 non-null float64
fuel_moisture_class    5717352 non-null int64
burnday_source         5717352 non-nu

In [37]:
emdata_weather = emdata_weather.dropna(subset=['humidity'])  

In [38]:
emdata_weather = emdata_weather.drop(columns=['visibility'])

In [43]:
# Creating Means for features

wind_speed_mean = emdata_weather['wind_speed'].mean()
wind_gust_mean = emdata_weather['wind_gust'].mean()

print(wind_speed_mean, wind_gust_mean)

7.904928151963509 14.492237774220422


In [45]:
weather_values = {'wind_speed': wind_speed_mean, 'wind_gust': wind_gust_mean}
emdata_weather = emdata_weather.fillna(value=weather_values)

In [48]:
emdata_weather = emdata_weather.drop(columns={'Unnamed: 0','area_burned', 'burnday_source'})

In [49]:
emdata_weather.info(verbose=True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5717352 entries, 0 to 6134149
Data columns (total 24 columns):
id                     5717352 non-null int64
year                   5717352 non-null int64
doy                    5717352 non-null int64
longitude              5717352 non-null float64
latitude               5717352 non-null float64
covertype              5717352 non-null int64
fuelcode               5717352 non-null int64
prefire_fuel           5717352 non-null float64
consumed_fuel          5717352 non-null float64
ECO2                   5717352 non-null float64
ECO                    5717352 non-null float64
ECH4                   5717352 non-null float64
EPM2.5                 5717352 non-null float64
cwd_frac               5717352 non-null float64
duff_frac              5717352 non-null float64
fuel_moisture_class    5717352 non-null int64
BSEV                   5717352 non-null int64
cluster_reference      5717352 non-null object
season                 5717352 non-nul

In [50]:
emdata_weather.to_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/BSEV_Model_June12.csv',index=False)

---
## Adding Regions into BSEV Model:

In [51]:
BSEV_model = pd.read_csv('../data/BSEV_Model_June12.csv')

In [52]:
print(BSEV_model.shape)
BSEV_model.head(3)

(5717352, 24)


Unnamed: 0,id,year,doy,longitude,latitude,covertype,fuelcode,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,BSEV,cluster_reference,season,temperature,humidity,precip_intensity,wind_speed,wind_gust
0,0,2008,359,-81.0384,25.1958,3,1600,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,1,2008_0,3,75.35,0.83,0.0,17.66,22.06
1,1,2008,359,-81.0404,25.1984,3,1600,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,2,2008_0,3,75.35,0.83,0.0,17.66,22.06
2,2,2008,359,-81.038,25.1981,3,1600,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,1,2008_0,3,75.35,0.83,0.0,17.66,22.06


In [55]:
BSEV_model['unique_id'] = BSEV_model['cluster_reference'].astype(str) + "_" + BSEV_model['latitude'].astype(str)

#### Merging Fire Regions to dataframe:

In [53]:
bsev_regions = pd.read_csv('../data/BSEV_Model_Regions_June12.csv')

In [54]:
print(bsev_regions.shape)
bsev_regions.head(6)

(5717352, 5)


Unnamed: 0,year,longitude,latitude,unique_id,cluster_label
0,2008,-81.0384,25.1958,2008_0_25.1958,2
1,2008,-81.0404,25.1984,2008_0_25.1984,2
2,2008,-81.038,25.1981,2008_0_25.1981,2
3,2008,-81.0594,25.2035,2008_0_25.2035,2
4,2008,-81.057,25.2032,2008_0_25.2032,2
5,2008,-81.0545,25.2028,2008_0_25.2028,2


In [56]:
# Merging data into NASA M6, changing by DF above and column name. 
BSEV_model_ii = BSEV_model.merge(right=bsev_regions.loc[:,['unique_id', 'cluster_label']],
                   how='left',
                   left_on=['unique_id'],
                   right_on=['unique_id'])

In [57]:
print(BSEV_model_ii.shape)
BSEV_model_ii.head(3)

(21580892, 26)


Unnamed: 0,id,year,doy,longitude,latitude,covertype,fuelcode,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,BSEV,cluster_reference,season,temperature,humidity,precip_intensity,wind_speed,wind_gust,unique_id,cluster_label
0,0,2008,359,-81.0384,25.1958,3,1600,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,1,2008_0,3,75.35,0.83,0.0,17.66,22.06,2008_0_25.1958,2
1,1,2008,359,-81.0404,25.1984,3,1600,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,2,2008_0,3,75.35,0.83,0.0,17.66,22.06,2008_0_25.1984,2
2,2,2008,359,-81.038,25.1981,3,1600,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,1,2008_0,3,75.35,0.83,0.0,17.66,22.06,2008_0_25.1981,2


In [58]:
BSEV_model_ii = BSEV_model_ii.drop_duplicates(subset=['latitude', 'longitude', 'doy', 'year','unique_id'], keep='first')

In [59]:
print(BSEV_model_ii.shape)
BSEV_model_ii.head(3)

(5717352, 26)


Unnamed: 0,id,year,doy,longitude,latitude,covertype,fuelcode,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,BSEV,cluster_reference,season,temperature,humidity,precip_intensity,wind_speed,wind_gust,unique_id,cluster_label
0,0,2008,359,-81.0384,25.1958,3,1600,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,1,2008_0,3,75.35,0.83,0.0,17.66,22.06,2008_0_25.1958,2
1,1,2008,359,-81.0404,25.1984,3,1600,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,2,2008_0,3,75.35,0.83,0.0,17.66,22.06,2008_0_25.1984,2
2,2,2008,359,-81.038,25.1981,3,1600,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,1,2008_0,3,75.35,0.83,0.0,17.66,22.06,2008_0_25.1981,2


In [60]:
BSEV_model_ii = BSEV_model_ii.drop(columns = {'unique_id'})

In [61]:
BSEV_model_ii = BSEV_model_ii.rename(columns = {'cluster_label':'fire_bsev_region'})

In [62]:
BSEV_model_ii = BSEV_model_ii[['id', 'year', 'doy', 'longitude', 'latitude', 'cluster_reference', 'fire_bsev_region', 'season', 'covertype',
                               'fuelcode', 'prefire_fuel', 'consumed_fuel', 'fuel_moisture_class','temperature', 'humidity', 'precip_intensity',
                               'wind_speed', 'wind_gust', 'cwd_frac', 'duff_frac', 'BSEV','ECO2', 'ECO', 'ECH4', 'EPM2.5']]

In [63]:
print(BSEV_model_ii.shape)
BSEV_model_ii.head(3)

(5717352, 25)


Unnamed: 0,id,year,doy,longitude,latitude,cluster_reference,fire_bsev_region,season,covertype,fuelcode,prefire_fuel,consumed_fuel,fuel_moisture_class,temperature,humidity,precip_intensity,wind_speed,wind_gust,cwd_frac,duff_frac,BSEV,ECO2,ECO,ECH4,EPM2.5
0,0,2008,359,-81.0384,25.1958,2008_0,2,3,3,1600,6220.097576,1999.75772,3,75.35,0.83,0.0,17.66,22.06,0.023231,0.082115,1,3347.594423,153.981344,4.499455,23.797117
1,1,2008,359,-81.0404,25.1984,2008_0,2,3,3,1600,6220.097576,2041.37434,3,75.35,0.83,0.0,17.66,22.06,0.022757,0.080441,2,3417.260644,157.185824,4.593092,24.292355
2,2,2008,359,-81.038,25.1981,2008_0,2,3,3,1600,6220.097576,1999.75772,3,75.35,0.83,0.0,17.66,22.06,0.023231,0.082115,1,3347.594423,153.981344,4.499455,23.797117


In [64]:
BSEV_model_ii.to_csv('../data/BSEV_Model_June13.csv')

In [70]:
BSEV_model_ii.to_sql('BSEV_Model', con = engine, if_exists = 'replace', chunksize = 100000)