In [1]:
import pandas as pd
import numpy as np

viirs_url = 'https://firms.modaps.eosdis.nasa.gov/data/active_fire/viirs/csv/VNP14IMGTDL_NRT_USA_contiguous_and_Hawaii_24h.csv'
modis_url = 'https://firms.modaps.eosdis.nasa.gov/data/active_fire/c6/csv/MODIS_C6_USA_contiguous_and_Hawaii_24h.csv'

## Summary of Data for FIRMS Active Fire

Nasa provides two analysis ready data products that are appropriate for detecting wildfires, MCD14DL that I will refer to as MODIS and VNP14IMGTDL_NRT or VIIRS. 

MODIS is a data product generated from telometry from two satellites Aqua and Terra. It resolves the earth's surface into roughly 1km x 1km pixels and evaluates whether each pixel is on fire using a contextual algorithm that exploits the strong emission of mid-infrared radiation from fires.

VIIRS has a smaller resolution and better global coverage (due to the Suomi-NPP satellite's orbit) and can resolve fire pixels down to 375m. 

Using both data sources should increase data coverage and potentially add a validation element to our model.

In [2]:
v = pd.read_csv(viirs_url)
m = pd.read_csv(modis_url)

In [3]:
v.shape, m.shape

((2085, 13), (404, 13))

In [4]:
v.head()

Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,confidence,version,bright_ti5,frp,daynight
0,46.91339,-64.0033,299.0,0.43,0.38,2019-09-10,600,N,nominal,1.0NRT,280.1,0.7,N
1,46.90993,-64.00423,301.4,0.43,0.38,2019-09-10,600,N,nominal,1.0NRT,280.5,0.7,N
2,46.57857,-80.79469,297.2,0.77,0.77,2019-09-10,600,N,nominal,1.0NRT,277.5,1.2,N
3,20.58317,-76.13242,301.7,0.53,0.5,2019-09-10,612,N,nominal,1.0NRT,276.1,1.4,N
4,19.54364,-70.8206,315.3,0.41,0.37,2019-09-10,612,N,nominal,1.0NRT,286.0,1.7,N


In [5]:
m.head()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,confidence,version,bright_t31,frp,daynight
0,32.13,-93.39,305.0,1.0,1.0,2019-09-10,420,T,62,6.0NRT,285.6,6.7,N
1,32.129,-93.4,325.0,1.0,1.0,2019-09-10,420,T,100,6.0NRT,289.5,23.2,N
2,32.223,-94.056,306.5,1.0,1.0,2019-09-10,420,T,68,6.0NRT,288.4,7.3,N
3,32.222,-94.067,305.8,1.0,1.0,2019-09-10,420,T,66,6.0NRT,289.3,6.7,N
4,37.136,-91.62,312.3,1.1,1.1,2019-09-10,420,T,84,6.0NRT,290.9,11.6,N


These are the current data outputs from modis and viirs, we will match our historical data to the shape of the current datastream.

In [6]:
mv_current = pd.merge(m.drop(columns='confidence'),v.drop(columns='confidence'), how='outer').fillna(0)

In [11]:
# historical modus and viirs data

mh = pd.read_csv('~/datascience/burn notice/Data-Science/Data/DL_FIRE_M6_66499 - Historical Modis/fire_archive_M6_66499.csv')
vh = pd.read_csv('~/datascience/burn notice/Data-Science/Data/fire_archive_V1_67178.csv')
vh.shape, mh.shape

((3623813, 14), (1780524, 15))

In [25]:
vh.head()

Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,type
0,39.21748,-76.47753,328.3,0.59,0.53,2012-01-20,619,N,VIIRS,n,1,272.9,5.5,2
1,39.21451,-76.47873,328.7,0.59,0.53,2012-01-20,619,N,VIIRS,n,1,271.7,5.2,2
2,40.39339,-79.85433,334.5,0.46,0.63,2012-01-20,619,N,VIIRS,n,1,265.8,5.0,2
3,34.37696,-79.88717,307.5,0.41,0.61,2012-01-20,620,N,VIIRS,n,1,273.3,1.0,2
4,34.37386,-79.89035,296.8,0.41,0.61,2012-01-20,620,N,VIIRS,n,1,273.7,1.4,2


In [29]:
mh.head()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,19.353,-155.0576,400.1,1.0,1.0,2001-01-01,849,Terra,MODIS,100,6.2,311.8,255.8,N,2
1,19.3378,-155.036,311.6,1.0,1.0,2001-01-01,849,Terra,MODIS,83,6.2,296.9,9.8,N,2
2,19.3544,-155.0482,322.2,1.0,1.0,2001-01-01,849,Terra,MODIS,100,6.2,293.6,19.6,N,2
3,19.3468,-155.0375,362.5,1.0,1.0,2001-01-01,849,Terra,MODIS,100,6.2,303.1,92.9,N,2
4,19.3516,-155.067,313.6,1.0,1.0,2001-01-01,849,Terra,MODIS,87,6.2,291.4,11.8,N,2


In [45]:
mh['daynight'].value_counts(normalize=True)

D    0.806254
N    0.193746
Name: daynight, dtype: float64

In [1]:
# df2 = emp.pivot_table(index='dept', values='salary', 
#                           aggfunc='mean').round(0)

pd.crosstab(index=mh['acq_time'], columns=mh['daynight'], normalize='index').head()

NameError: name 'pd' is not defined

In [None]:
# mv_current.columns.tolist(), vh.columns.tolist(), mh.columns.tolist()



pd.merge(mh.drop(columns=['confidence','instrument','type']),vh.drop(columns=['confidence','instrument','type']), how='outer').fillna(0)

In [23]:
vh.dtypes

latitude      float64
longitude     float64
bright_ti4    float64
scan          float64
track         float64
acq_date       object
acq_time        int64
satellite      object
instrument     object
confidence     object
version         int64
bright_ti5    float64
frp           float64
type            int64
dtype: object

In [24]:
mh.dtypes

latitude      float64
longitude     float64
brightness    float64
scan          float64
track         float64
acq_date       object
acq_time        int64
satellite      object
instrument     object
confidence      int64
version       float64
bright_t31    float64
frp           float64
daynight       object
type            int64
dtype: object

In [17]:
# we probably have to remove confidence b/c it's based on different scales, I can also map day/night to different values....or maybe drop it?

mdf.daynight.value_counts()

D    1435555
N     344969
Name: daynight, dtype: int64

In [15]:
vdf.type.value_counts()

0    436467
2     78261
3     27390
1      2679
Name: type, dtype: int64