In [55]:
%matplotlib inline
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
import random
InteractiveShell.ast_node_interactivity = "all"
import os
from datetime import datetime
import matplotlib.pyplot as plt
#import plotly.express as px
import glob
from tqdm import tqdm
from sklearn import preprocessing

In [82]:
# import data
url = '../data/australia_old.csv'
fileName = '../data/australia_old'
aus_fires = pd.read_csv(url, parse_dates=['acq_date'])
aus_fires.shape
aus_fires.head()

(1445364, 15)

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,-14.9764,145.2801,320.6,2.0,1.4,2015-01-01,104,Terra,MODIS,24,6.2,294.8,25.0,D,0
1,-15.8931,136.6094,324.4,1.2,1.1,2015-01-01,104,Terra,MODIS,28,6.2,302.3,11.6,D,0
2,-18.5115,139.5995,331.8,1.0,1.0,2015-01-01,105,Terra,MODIS,37,6.2,305.0,19.1,D,0
3,-19.0015,121.9994,326.9,2.9,1.6,2015-01-01,243,Terra,MODIS,37,6.2,300.0,46.9,D,0
4,-18.0765,122.69,314.5,3.2,1.7,2015-01-01,243,Terra,MODIS,25,6.2,292.5,42.0,D,0


In [83]:
aus_fires.rename({'acq_date': 'time'}, axis=1, inplace=True)
aus_fires['est_fire_area'] = aus_fires['scan'] * aus_fires['track']
aus_fires['est_brightness'] = (aus_fires['brightness'] + aus_fires['bright_t31'])/2
aus_fires.latitude = aus_fires.latitude.round(1)
aus_fires.longitude = aus_fires.longitude.round(1)
aus_fires = aus_fires[aus_fires.type==0]

aus_fires.shape
aus_fires.head()

(1436163, 17)

Unnamed: 0,latitude,longitude,brightness,scan,track,time,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,est_fire_area,est_brightness
0,-15.0,145.3,320.6,2.0,1.4,2015-01-01,104,Terra,MODIS,24,6.2,294.8,25.0,D,0,2.8,307.7
1,-15.9,136.6,324.4,1.2,1.1,2015-01-01,104,Terra,MODIS,28,6.2,302.3,11.6,D,0,1.32,313.35
2,-18.5,139.6,331.8,1.0,1.0,2015-01-01,105,Terra,MODIS,37,6.2,305.0,19.1,D,0,1.0,318.4
3,-19.0,122.0,326.9,2.9,1.6,2015-01-01,243,Terra,MODIS,37,6.2,300.0,46.9,D,0,4.64,313.45
4,-18.1,122.7,314.5,3.2,1.7,2015-01-01,243,Terra,MODIS,25,6.2,292.5,42.0,D,0,5.44,303.5


In [84]:
fires = aus_fires[['latitude', 'longitude', 'time','confidence',
                           'est_fire_area','est_brightness','frp']].copy()
## Add fire_count column
count = fires.groupby(['latitude', 'longitude', 'time']).size().reset_index().rename(columns={0:'fire_count'})
fire_copy = fires.merge(count,how='outer', on=['latitude', 'longitude', 'time'])

fire_copy

Unnamed: 0,latitude,longitude,time,confidence,est_fire_area,est_brightness,frp,fire_count
0,-15.0,145.3,2015-01-01,24,2.80,307.70,25.0,1
1,-15.9,136.6,2015-01-01,28,1.32,313.35,11.6,1
2,-18.5,139.6,2015-01-01,37,1.00,318.40,19.1,1
3,-19.0,122.0,2015-01-01,37,4.64,313.45,46.9,1
4,-18.1,122.7,2015-01-01,25,5.44,303.50,42.0,2
...,...,...,...,...,...,...,...,...
1436158,-26.7,150.7,2020-12-31,35,1.32,295.85,4.7,1
1436159,-28.3,122.2,2020-12-31,29,3.45,303.00,38.1,1
1436160,-28.3,122.3,2020-12-31,31,3.30,299.50,32.8,3
1436161,-28.3,122.3,2020-12-31,65,1.32,306.00,20.6,3


In [85]:
fire_copy = fire_copy.groupby(['latitude', 'longitude', 'time'])[['fire_count','confidence','frp','est_fire_area','est_brightness']].mean().reset_index()
fire_copy

Unnamed: 0,latitude,longitude,time,fire_count,confidence,frp,est_fire_area,est_brightness
0,-43.5,146.2,2018-03-23,6.0,56.000000,92.700000,5.433333,306.166667
1,-43.5,146.8,2020-03-27,1.0,0.000000,33.600000,1.210000,306.100000
2,-43.5,146.9,2020-03-18,1.0,100.000000,0.000000,1.680000,327.850000
3,-43.4,146.9,2015-04-14,2.0,84.000000,14.750000,1.000000,297.025000
4,-43.4,146.9,2016-04-24,3.0,64.333333,135.966667,3.196667,309.383333
...,...,...,...,...,...,...,...,...
423591,-9.4,142.7,2020-11-12,1.0,0.000000,19.500000,2.340000,306.550000
423592,-9.3,142.3,2020-11-10,1.0,0.000000,8.500000,1.100000,308.400000
423593,-9.2,142.2,2018-10-13,1.0,38.000000,8.800000,1.000000,312.350000
423594,-9.2,142.2,2018-11-23,1.0,37.000000,9.800000,1.100000,310.300000


In [86]:
## Add location count
count = fire_copy.groupby(['latitude', 'longitude']).size().reset_index().rename(columns={0:'loc_count'})
fire_copy = fire_copy.merge(count,how='outer', on=['latitude', 'longitude'])

fire_copy

Unnamed: 0,latitude,longitude,time,fire_count,confidence,frp,est_fire_area,est_brightness,loc_count
0,-43.5,146.2,2018-03-23,6.0,56.000000,92.700000,5.433333,306.166667,1
1,-43.5,146.8,2020-03-27,1.0,0.000000,33.600000,1.210000,306.100000,1
2,-43.5,146.9,2020-03-18,1.0,100.000000,0.000000,1.680000,327.850000,1
3,-43.4,146.9,2015-04-14,2.0,84.000000,14.750000,1.000000,297.025000,12
4,-43.4,146.9,2016-04-24,3.0,64.333333,135.966667,3.196667,309.383333,12
...,...,...,...,...,...,...,...,...,...
423591,-9.4,142.7,2020-11-12,1.0,0.000000,19.500000,2.340000,306.550000,5
423592,-9.3,142.3,2020-11-10,1.0,0.000000,8.500000,1.100000,308.400000,1
423593,-9.2,142.2,2018-10-13,1.0,38.000000,8.800000,1.000000,312.350000,3
423594,-9.2,142.2,2018-11-23,1.0,37.000000,9.800000,1.100000,310.300000,3


In [87]:
fire_copy.est_fire_area = fire_copy.est_fire_area.round(1)
fire_copy.est_brightness = fire_copy.est_brightness.round(1)
fire_copy.confidence = fire_copy.confidence.round().astype(int)
fire_copy.frp = fire_copy.frp.round(1)
fire_copy.fire_count = fire_copy.fire_count.round().astype(int)

fire_copy.head(20)

Unnamed: 0,latitude,longitude,time,fire_count,confidence,frp,est_fire_area,est_brightness,loc_count
0,-43.5,146.2,2018-03-23,6,56,92.7,5.4,306.2,1
1,-43.5,146.8,2020-03-27,1,0,33.6,1.2,306.1,1
2,-43.5,146.9,2020-03-18,1,100,0.0,1.7,327.8,1
3,-43.4,146.9,2015-04-14,2,84,14.8,1.0,297.0,12
4,-43.4,146.9,2016-04-24,3,64,136.0,3.2,309.4,12
5,-43.4,146.9,2016-04-25,2,66,45.7,2.9,301.8,12
6,-43.4,146.9,2017-05-19,1,88,74.0,1.3,315.2,12
7,-43.4,146.9,2018-04-04,3,79,148.4,1.3,325.8,12
8,-43.4,146.9,2018-04-20,1,28,15.3,1.3,298.4,12
9,-43.4,146.9,2018-05-01,2,69,100.4,2.9,311.6,12


In [88]:
## Normalizing

In [89]:
confidence_norm = preprocessing.normalize([fire_copy['confidence']], norm='max')
frp_norm = preprocessing.normalize([fire_copy['frp']], norm='max')
area_norm = preprocessing.normalize([fire_copy['est_fire_area']], norm='max')
count_norm = preprocessing.normalize([fire_copy['fire_count']], norm='max')

In [90]:
fire_copy['ranking'] = confidence_norm[0]*0.4 + frp_norm[0]*0.2 + area_norm[0]*0.2 + count_norm[0]*0.2

In [91]:
fire_copy = fire_copy.sort_values(by=['loc_count'], ascending=False)
fire_copy.head(30)

Unnamed: 0,latitude,longitude,time,fire_count,confidence,frp,est_fire_area,est_brightness,loc_count,ranking
184355,-20.6,116.8,2017-05-08,1,74,14.1,1.1,317.7,211,0.321056
181967,-20.8,115.4,2016-03-04,5,86,54.6,2.2,321.1,211,0.399909
181943,-20.8,115.4,2016-02-01,2,66,20.2,1.9,303.5,211,0.307549
181944,-20.8,115.4,2016-02-02,5,64,29.5,2.1,312.5,211,0.308842
181945,-20.8,115.4,2016-02-03,5,84,34.4,1.9,314.3,211,0.384867
181946,-20.8,115.4,2016-02-04,2,88,29.6,2.1,313.5,211,0.400084
181947,-20.8,115.4,2016-02-06,1,88,19.5,1.1,324.8,211,0.377268
181948,-20.8,115.4,2016-02-07,4,74,15.1,1.5,309.7,211,0.334191
181949,-20.8,115.4,2016-02-09,2,66,25.4,3.8,303.6,211,0.347336
181950,-20.8,115.4,2016-02-10,3,82,14.3,1.5,313.4,211,0.364572


In [92]:
fire_copy.to_csv(f"{fileName}_prepared.csv.gz", index=False, compression='gzip')

In [93]:
fire_copy.to_csv(f"{fileName}_prepared.csv", index=False)