# Notebook for creating connections between DataSets:

### Query Code - Location of closest coordinate out of a list of coordinates. 
##### - https://www.timvink.nl/closest-coordinates/
##### - https://stackoverflow.com/questions/39107896/efficiently-finding-the-closest-coordinate-pair-from-a-set-in-python

In [31]:
import pandas as pd
import numpy as np
from geopy.distance import great_circle
import math
import seaborn as sns
from scipy import spatial
import matplotlib.pyplot as plt
import datetime

from scipy import spatial

places = []
for index, row in geonames.iterrows():
    coordinates = [row['latitude'], row['longitude']]
    cartesian_coord = cartesian(*coordinates)
    places.append(cartesian_coord)

tree = spatial.KDTree(places)

def find_population(lat, lon):
    cartesian_coord = cartesian(lat, lon)
    closest = tree.query([cartesian_coord], p = 2)
    index = closest[1][0]
    return {
        'name' : geonames.name[index],
        'latitude' : geonames.latitude[index],
        'longitude' : geonames.longitude[index],
        'population' : geonames.population[index],
        'distance' : closest[0][0]
    }

## One-Time - Combining Cluster Point CSVs:

#Combine all csv's in a directory, into a new file.
import os 
import glob 
import pandas as pd 

os.chdir('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/Emissions Cluster Data/EmissionClusterPoints')

extension = 'csv' 
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

#combine all files in the list 
clusterpoints_03_15_v2 = pd.concat([pd.read_csv(f) for f in all_filenames ])

###export to csv 
clusterpoints_03_15_v2.to_csv( "clusterpoints_03_15_v2.csv", index=False, encoding='utf-8-sig')


## Load related DataFrames: Fire Cluster Center Points, Cluster Points and 1.88m Fire Records:

In [2]:
# Expanding number of columns:
pd.set_option('display.max_columns', 40)

In [11]:
# Loading 1.88m fire record table:
usdafiredb_onemil = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/1_188m_USDA Fire Database/Fire_Program_Analysis__Fire_Occurrence_Database_Feature_Layer.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [187]:
FireCenterPoints = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/Emissions Cluster Data/centerpoints_03_15_v2.csv')

In [4]:
emdata_0315 = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/9 Missoula Emisions Data RDS-2017-0039/Emissions_Year/emissions_2003to2015_cleanv1.csv')

In [144]:
ClusterPoints = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/Emissions Cluster Data/clusterpoints_03_15_v2.csv', encoding='utf-8')

## Emissions Data

In [8]:
emdata_0315.head(2)

Unnamed: 0.1,Unnamed: 0,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV
0,0,2008,359,-81.0384,25.1958,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1
1,1,2008,359,-81.0404,25.1984,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2


In [9]:
emdata_20xx = emdata_0315[emdata_0315.year == 2004]
emdata_20xx.head(2)

Unnamed: 0.1,Unnamed: 0,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV
3112210,3625932,2004,217,-98.851,26.7051,1,1,62500.0,53.016678,49.305511,82.685341,3.451386,0.132139,0.502916,0.0,0.0,2,15,2
3112211,3625934,2004,217,-98.8586,26.7071,2,2,62500.0,585.985608,527.387047,884.428078,36.917093,1.413397,5.379348,0.0,0.0,2,15,2


## Fire Cluster Points Data:

In [145]:
print(ClusterPoints.shape)
ClusterPoints[200000:200005]

(5960572, 5)


Unnamed: 0.1,Unnamed: 0,0,1,ClusterNum,Year
200000,200000,31.7406,-109.2987,361,2011
200001,200001,31.7409,-109.2961,361,2011
200002,200002,31.7412,-109.2935,361,2011
200003,200003,31.7415,-109.2909,361,2011
200004,200004,31.7418,-109.2882,361,2011


## Creating New Fire Cluster Point Dataframe:
#### Dropping unused columns
#### Renaming column headers
#### Assigning new Cluster Reference to cluster points

In [148]:
ClusterPoints = ClusterPoints.drop(columns=['Unnamed: 0'])
ClusterPoints[0:1]

Unnamed: 0,0,1,ClusterNum,Year
0,25.1903,-81.0394,0,2011


In [149]:
ClusterPoints = ClusterPoints.rename(columns={'0': 'latitude', '1': 'longitude', 'Year': 'year'})
ClusterPoints[0:5]

Unnamed: 0,latitude,longitude,ClusterNum,year
0,25.1903,-81.0394,0,2011
1,25.19,-81.037,0,2011
2,25.1896,-81.0346,0,2011
3,25.1926,-81.039,0,2011
4,25.1922,-81.0366,0,2011


In [150]:
ClusterPoints['cluster_reference'] = ClusterPoints['year'].astype(str) + "_" + ClusterPoints['ClusterNum'].astype(str)
ClusterPoints[0:10]

Unnamed: 0,latitude,longitude,ClusterNum,year,cluster_reference
0,25.1903,-81.0394,0,2011,2011_0
1,25.19,-81.037,0,2011,2011_0
2,25.1896,-81.0346,0,2011,2011_0
3,25.1926,-81.039,0,2011,2011_0
4,25.1922,-81.0366,0,2011,2011_0
5,25.1918,-81.0342,0,2011,2011_0
6,25.1995,-81.0701,0,2011,2011_0
7,25.1991,-81.0677,0,2011,2011_0
8,25.1952,-81.0411,0,2011,2011_0
9,25.1948,-81.0386,0,2011,2011_0


In [151]:
ClusterPoints[1000:1002]
print(ClusterPoints.shape)

(5960572, 5)


In [152]:
# Writing new dataset to CSV:
ClusterPoints.to_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/Emissions Cluster Data/clusterpoints_03_15_ref1.csv', encoding='utf-8')

## Fire Center Points Data:
#### Renaming columns, adding cluster_reference id, moving column. 

In [188]:
print(FireCenterPoints.shape)
FireCenterPoints[0:2]

(109321, 23)


Unnamed: 0.1,Unnamed: 0,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag
0,0,22.0,2006.0,208.0,-80.6759,25.4076,5461.0,1.0,1.0,62500.0,394.430634,366.82049,615.157961,25.677434,0.983079,3.741569,0.0,0.0,2.0,1.0,15.0,2.0,0.0
1,1,96.0,2006.0,157.0,-80.5824,25.613,6845.0,1.0,1.0,62500.0,299.942136,278.946186,467.792755,19.526233,0.747576,2.845251,0.0,0.0,2.0,1.0,81.0,2.0,0.0


In [189]:
FireCenterPoints = FireCenterPoints.rename(columns={'Unnamed: 0': 'cluster_ref'})

In [190]:
FireCenterPoints = FireCenterPoints.astype({"year": int}) 

In [191]:
FireCenterPoints.head(1)

Unnamed: 0,cluster_ref,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag
0,0,22.0,2006,208.0,-80.6759,25.4076,5461.0,1.0,1.0,62500.0,394.430634,366.82049,615.157961,25.677434,0.983079,3.741569,0.0,0.0,2.0,1.0,15.0,2.0,0.0


In [192]:
FireCenterPoints['cluster_reference'] = FireCenterPoints['year'].astype(str) + "_" + FireCenterPoints['cluster_ref'].astype(str)

In [193]:
FireCenterPoints[0:5]

Unnamed: 0,cluster_ref,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag,cluster_reference
0,0,22.0,2006,208.0,-80.6759,25.4076,5461.0,1.0,1.0,62500.0,394.430634,366.82049,615.157961,25.677434,0.983079,3.741569,0.0,0.0,2.0,1.0,15.0,2.0,0.0,2006_0
1,1,96.0,2006,157.0,-80.5824,25.613,6845.0,1.0,1.0,62500.0,299.942136,278.946186,467.792755,19.526233,0.747576,2.845251,0.0,0.0,2.0,1.0,81.0,2.0,0.0,2006_1
2,2,164.0,2006,157.0,-80.5513,25.6198,6845.0,1.0,1.0,62500.0,226.077462,210.25204,352.592671,14.717643,0.563475,2.144571,0.0,0.0,2.0,1.0,15.0,2.0,0.0,2006_2
3,3,718.0,2006,49.0,-81.1249,26.0165,8222.0,3.0,1600.0,62500.0,6220.097576,2017.491592,3377.280924,155.346853,4.539356,24.00815,0.011189,0.081394,4.0,1.0,15.0,2.0,0.0,2006_3
4,4,444.0,2006,155.0,-81.6018,26.0708,8217.0,3.0,1140.0,62500.0,7646.952224,3842.452183,6432.264954,295.868818,8.645517,45.725181,0.008857,0.084716,2.0,1.0,15.0,2.0,1.0,2006_4


In [194]:
first_col = FireCenterPoints.pop('cluster_reference')
FireCenterPoints.insert(1, 'cluster_reference', first_col)
FireCenterPoints[0:2]

Unnamed: 0,cluster_ref,cluster_reference,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag
0,0,2006_0,22.0,2006,208.0,-80.6759,25.4076,5461.0,1.0,1.0,62500.0,394.430634,366.82049,615.157961,25.677434,0.983079,3.741569,0.0,0.0,2.0,1.0,15.0,2.0,0.0
1,1,2006_1,96.0,2006,157.0,-80.5824,25.613,6845.0,1.0,1.0,62500.0,299.942136,278.946186,467.792755,19.526233,0.747576,2.845251,0.0,0.0,2.0,1.0,81.0,2.0,0.0


-----

## Duplicates - Center Points: identify duplicates in Centerpoint data using DOY, Year, and alternating Lat and Long and merging coordinates that are very close. 
#### Based on the manual review, create short list of cluster references to merge. 

In [120]:
centerpoint_dupes = FireCenterPoints[FireCenterPoints.duplicated(['doy', 'year', 'longitude'],keep=False)]
centerpoint_dupes_lat = FireCenterPoints[FireCenterPoints.duplicated(['doy', 'year', 'latitude'],keep=False)]

In [146]:
print(centerpoint_dupes_lat.shape)
centerpoint_dupes_lat[50:59]

(58, 24)


Unnamed: 0,cluster_ref,cluster_reference,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag
94175,6123,2010_6123,366894.0,2010,91.0,-101.3733,35.6479,52741.0,1.0,1.0,62500.0,148.738122,138.326453,231.973462,9.682852,0.370715,1.41093,0.0,0.0,2.0,4.0,78.0,2.0,1.0
94185,6133,2010_6133,366929.0,2010,91.0,-101.471,35.6479,52740.0,1.0,1.0,62500.0,68.708718,63.899108,107.158804,4.472938,0.17125,0.651771,0.0,0.0,2.0,4.0,77.0,2.0,1.0
95648,7596,2010_7596,378334.0,2010,192.0,-121.504,37.632,76080.0,1.0,1.0,0.0,222.826968,207.22908,347.523168,14.506036,0.555374,2.113737,0.0,0.0,3.0,4.0,77.0,1.0,1.0
95649,7597,2010_7597,378343.0,2010,192.0,-121.5366,37.632,76080.0,1.0,1.0,62500.0,211.057938,196.283882,329.168071,13.739872,0.526041,2.002096,0.0,0.0,3.0,4.0,77.0,2.0,1.0
106057,4285,2012_4285,713074.0,2012,203.0,-94.3356,34.2162,44967.0,1.0,1.0,62500.0,379.074852,352.539612,591.20893,24.677773,0.944806,3.595904,0.0,0.0,2.0,4.0,78.0,3.0,1.0
106079,4307,2012_4307,713162.0,2012,203.0,-92.5906,34.2162,44983.0,3.0,1400.0,0.0,6849.442774,3461.730825,5794.937402,266.553274,7.788894,41.194597,0.025578,0.07439,2.0,4.0,78.0,1.0,1.0
108054,6282,2012_6282,728838.0,2012,250.0,-120.765,37.9822,77009.0,1.0,1.0,62500.0,299.26962,278.320747,466.743892,19.482452,0.7459,2.838872,0.0,0.0,2.0,4.0,77.0,2.0,1.0
108075,6303,2012_6303,729157.0,2012,250.0,-121.9208,37.9822,77921.0,1.0,1.0,62500.0,220.585248,205.144281,344.026959,14.3601,0.549787,2.092472,0.0,0.0,2.0,4.0,77.0,2.0,1.0


In [58]:
### Research into single duplicate in the Fire CenterPoint Data:
day = 75
lg = -82.9603

FireCenterPoints.loc[(FireCenterPoints['year'] == 2013) & (FireCenterPoints['doy'] == day) &(FireCenterPoints['longitude'] == lg),['cluster_reference', 'year', 'doy', 'longitude', 'latitude']]

Unnamed: 0,cluster_reference,year,doy,longitude,latitude
100468,2013_4179,2013,75.0,-82.9603,35.5648
100654,2013_4365,2013,75.0,-82.9603,36.642


In [153]:
# Seeing how many cluster points are related to the cluster reference, in determining which to merge. 
c1 = '2010_7596'
c2 = '2010_7597'

a = ClusterPoints.loc[(ClusterPoints['cluster_reference'] == c1)]
b = ClusterPoints.loc[(ClusterPoints['cluster_reference'] == c2)]
print(len(a), len(b))
print(b[0:10])
print(a[0:10])

5 10
         latitude  longitude  ClusterNum  year cluster_reference
1601276   37.6286  -121.5413        7597  2010         2010_7597
1601277   37.6292  -121.5386        7597  2010         2010_7597
1601278   37.6308  -121.5421        7597  2010         2010_7597
1601279   37.6320  -121.5366        7597  2010         2010_7597
1601280   37.6325  -121.5338        7597  2010         2010_7597
1601281   37.6335  -121.5401        7597  2010         2010_7597
1601282   37.6341  -121.5373        7597  2010         2010_7597
1601283   37.6347  -121.5346        7597  2010         2010_7597
1601284   37.6357  -121.5409        7597  2010         2010_7597
1601285   37.6363  -121.5381        7597  2010         2010_7597
         latitude  longitude  ClusterNum  year cluster_reference
1601271   37.6299  -121.5032        7596  2010         2010_7596
1601272   37.6320  -121.5040        7596  2010         2010_7596
1601273   37.6326  -121.5012        7596  2010         2010_7596
1601274   37.6342  -

## Duplicate Centerpoints / Merging list:
- 2006_1874 same as 2006_1880 = merged 2006_1874 into 2006_1880.
         2006_3171 lat is off by 2 from 2006_3568
         2006_5430 not equal to '2006_10010'
- 2006_11021 merged into 2006_11012
- 2004_5765 merged into 2004_5772
- 2005_7122 merged into 2005_228
- 2003_3553 mergred into 2003_3572
- 2003_5931 merged into 2003_722
- 2008_2209 merged into 2008_2201
        '2008_4900' not eq to '2008_6046'
- 2008_7540 merged into 2008_7603
- 2009_1551 merged into 2009_1554
- 2009_1952 merged into 2009_1957
- 2009_2127 merged into 2009_2125
        '2009_5224' note eq to '2009_7086'
- 2013_4179 merged into 2013_4365

- 2007_4737 merged into 2007_4730
- 2007_8121 merged into 2007_8120
- 2015_3984 merged into 2015_3968
- 2011_9524 merged into 2011_9537
- 2011_9726 merged into 2011_9722
- 2009_2255 merged into 2009_2256
- 2009_4723 merged into 2009_4724
- 2010_4123 merged into 2010_4124
- 2010_6123 merged into 2010_6133
- 2010_7596 merged into 2010_7597

In [160]:
ClusterPoints.head(1)

Unnamed: 0,latitude,longitude,ClusterNum,year,cluster_reference
0,25.1903,-81.0394,0,2011,2011_0


In [180]:
# Method to replace the first reference with the second. 
ClusterPoints['cluster_reference'].mask(ClusterPoints['cluster_reference'] == '2006_1874', '2006_1880', inplace=True)

In [183]:
print(ClusterPoints.shape)
ClusterPoints.loc[(ClusterPoints['cluster_reference'] == '2006_1874')]

(5960572, 5)


Unnamed: 0,latitude,longitude,ClusterNum,year,cluster_reference


In [269]:
ClusterPoints.reset_index(inplace = True, drop = True) 

In [270]:
ClusterPoints.to_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/Emissions Cluster Data/clusterpoints_03_15_refv5.csv', encoding='utf-8')

### Remove centerpoint rows for duplicate cluster references as noted above. 

In [198]:
FireCenterPoints.shape

(109321, 24)

In [263]:
adrop = '2010_7596'

In [264]:
FireCenterPoints.drop(FireCenterPoints[FireCenterPoints['cluster_reference'] == adrop].index, inplace = True) 

In [265]:
FireCenterPoints.shape

(109299, 24)

In [271]:
FireCenterPoints.reset_index(inplace = True, drop = True) 

In [272]:
FireCenterPoints.to_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/Emissions Cluster Data/centerpoints_03_15_v5.csv', encoding='utf-8')

## Emissions Data for Database:

In [186]:
print(emdata_0315.shape)
emdata_0315.head(2)

(5960572, 18)


Unnamed: 0,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV
0,2008,359,-81.0384,25.1958,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1
1,2008,359,-81.0404,25.1984,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2


In [30]:
print(emdata_0315.shape)
emdata_0315 = emdata_0315.drop(columns=['Unnamed: 0'])
emdata_0315.head(2)

(5960572, 19)


Unnamed: 0,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV
0,2008,359,-81.0384,25.1958,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1
1,2008,359,-81.0404,25.1984,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2


----

# Assigning Cluster References to the Emissions Dataset:

# Test
### Taking the Emissions Dataset (emdata_0315) and by lat/long/year, bringing over the cluster references from the ClusterPoints df. 

In [290]:
emdatatest = emdata_0315[(emdata_0315.year == 2008) & (emdata_0315.doy < 184)]
ClusterPointstest = ClusterPoints[(ClusterPoints.year == 2008)]
print(emdatatest.shape)
print (ClusterPointstest.shape)
print(emdatatest.head(3), ClusterPointstest.head(3))

(277210, 18)
(440888, 5)
     year  doy  longitude  latitude  covertype  fuelcode  area_burned  \
619  2008    1   -81.0472   25.4790          3      1600      62500.0   
634  2008    2   -81.0614   25.4834          3      1600      62500.0   
635  2008    3   -81.0589   25.4830          3      1600      62500.0   

     prefire_fuel  consumed_fuel         ECO2         ECO      ECH4  \
619   6220.097576     2041.37434  3417.260644  157.185824  4.593092   
634   6220.097576     2041.37434  3417.260644  157.185824  4.593092   
635   6220.097576     2041.37434  3417.260644  157.185824  4.593092   

        EPM2.5  cwd_frac  duff_frac  fuel_moisture_class  burnday_source  BSEV  
619  24.292355  0.022757   0.080441                    3              77     2  
634  24.292355  0.022757   0.080441                    3              77     2  
635  24.292355  0.022757   0.080441                    3              77     2            latitude  longitude  ClusterNum  year cluster_reference
5519684 

In [291]:
emdatatest.reset_index(inplace = True, drop = True) 

In [292]:
emdatatest.tail()

Unnamed: 0,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV
277205,2008,176,-119.6805,48.3882,3,1200,62500.0,6424.431099,4392.634153,6826.153474,584.220342,32.856903,100.152059,0.26951,0.21138,2,78,3
277206,2008,176,-119.6772,48.3888,1,1,62500.0,212.178798,197.326282,330.916175,13.81284,0.528834,2.012728,0.0,0.0,2,78,2
277207,2008,176,-119.674,48.3893,1,1,62500.0,128.226384,119.250537,199.983151,8.347538,0.319591,1.216355,0.0,0.0,2,78,2
277208,2008,176,-119.6781,48.391,1,1,62500.0,212.178798,197.326282,330.916175,13.81284,0.528834,2.012728,0.0,0.0,2,78,2
277209,2008,183,-122.0411,48.9108,3,1200,62500.0,6424.431099,3481.424868,5410.134245,463.029507,26.041058,79.376487,0.274749,0.230879,3,15,2


In [100]:
dupes1 = emdatatest[emdatatest.duplicated(['doy', 'year', 'longitude', 'latitude'])]
dupes2 = emdatatest.drop_duplicates(subset=['doy', 'year', 'longitude', 'latitude'], keep='first')

In [294]:
print(dupes2.shape)
dupes2[0:1]

(275207, 18)


Unnamed: 0,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV
0,2008,1,-81.0472,25.479,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,77,2


## Test Merge:

In [295]:
emdatatestv2 = dupes2.merge(right=ClusterPointstest.loc[:,['latitude', 'longitude','year', 'cluster_reference']],
                   how='left',
                   left_on=['latitude','longitude','year'],
                   right_on=['latitude','longitude','year'])

emdatatestv2 = pd.merge(emdatatest, 
                   ClusterPointstest[['latitude', 'longitude', 'year', 'cluster_reference']],
                     on =['latitude', 'longitude', 'year'],
                    how ='left')

In [296]:
print(emdatatestv2.shape)
emdatatestv2[0:2]

(279821, 19)


Unnamed: 0,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV,cluster_reference
0,2008,1,-81.0472,25.479,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,77,2,2008_2
1,2008,2,-81.0614,25.4834,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,77,2,2008_2


In [114]:
cluster_dupes = emdatatestv2[emdatatestv2.duplicated(['doy', 'year', 'longitude', 'latitude'],)]

In [116]:
print(cluster_dupes.shape)
cluster_dupes[0:20]

(1070, 19)


Unnamed: 0,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV,cluster_reference
12744,2003,92,-93.9317,34.351,3,1500,62500.0,4335.068543,2390.7403,4002.099263,184.087003,5.379166,28.44981,0.038183,0.050889,2,15,2,2003_48
12777,2003,92,-93.9481,34.3535,3,1400,62500.0,6849.442774,3477.623684,5821.542047,267.777024,7.824653,41.383722,0.025461,0.07405,2,15,2,2003_48
12779,2003,86,-93.9481,34.3535,3,1400,62500.0,6849.442774,3477.623684,5821.542047,267.777024,7.824653,41.383722,0.025461,0.07405,2,15,2,2003_48
12784,2003,92,-93.9371,34.3533,3,1160,62500.0,7020.760516,3838.521588,6425.685138,295.566162,8.636674,45.678407,0.013229,0.068897,2,15,3,2003_48
12786,2003,92,-93.9344,34.3533,3,1500,62500.0,4335.068543,2390.7403,4002.099263,184.087003,5.379166,28.44981,0.038183,0.050889,2,15,2,2003_48
12788,2003,92,-93.9317,34.3532,3,1160,62500.0,7020.760516,3838.521588,6425.685138,295.566162,8.636674,45.678407,0.013229,0.068897,2,15,3,2003_48
12790,2003,92,-93.9289,34.3532,3,1500,62500.0,4335.068543,2390.7403,4002.099263,184.087003,5.379166,28.44981,0.038183,0.050889,2,15,2,2003_48
12818,2003,92,-93.9699,34.3561,3,1500,62500.0,4335.068543,2390.7403,4002.099263,184.087003,5.379166,28.44981,0.038183,0.050889,2,15,2,2003_48
12820,2003,86,-93.9699,34.3561,3,1500,62500.0,4335.068543,2390.7403,4002.099263,184.087003,5.379166,28.44981,0.038183,0.050889,2,15,2,2003_48
12822,2003,92,-93.9672,34.3561,3,1400,62500.0,6849.442774,3477.623684,5821.542047,267.777024,7.824653,41.383722,0.025461,0.07405,2,15,2,2003_48


In [117]:
dupes3 = emdatatestv2.drop_duplicates(subset=['doy', 'year', 'longitude', 'latitude'], keep='first')

In [118]:
dupes3.shape

(98743, 19)

--------

# Merge of Cluster Reference to Emissions Data:
## Taking the Emissions Dataset (emdata_0315) and by lat/long/year, bringing over the cluster references from the ClusterPoints df. 

In [267]:
print(emdata_0315.shape)
print(ClusterPoints.shape)

(5960572, 18)
(5960572, 5)


In [282]:
ClusterPoints.tail(5)

Unnamed: 0,latitude,longitude,ClusterNum,year,cluster_reference
5960567,48.8939,-121.1569,9262,2008,2008_9262
5960568,48.8945,-121.1537,9262,2008,2008_9262
5960569,48.8951,-121.1504,9262,2008,2008_9262
5960570,48.8967,-121.1545,9262,2008,2008_9262
5960571,48.9108,-122.0411,9263,2008,2008_9263


In [297]:
emdata_0315_nodup = emdata_0315.drop_duplicates(subset=['doy', 'year', 'longitude', 'latitude'], keep='first')

In [303]:
print(emdata_0315_nodup.shape)
emdata_0315_nodup[0:2]

(5936438, 18)


Unnamed: 0,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV
0,2008,359,-81.0384,25.1958,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1
1,2008,359,-81.0404,25.1984,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2


In [302]:
emdata_0315_ref5 = emdata_0315_nodup.merge(right=ClusterPoints.loc[:,['latitude', 'longitude','year','cluster_reference']],
                   how='left',
                   left_on=['latitude','longitude','year'],
                   right_on=['latitude','longitude','year'])

In [305]:
print(emdata_0315_ref5.shape)
emdata_0315_ref5[0:1]

(5988996, 19)


Unnamed: 0,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV,cluster_reference
0,2008,359,-81.0384,25.1958,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0


In [306]:
emdata_0315_ref5_clean = emdata_0315_ref5.drop_duplicates(subset=['doy', 'year', 'longitude', 'latitude'], keep='first')
print(emdata_0315_ref5_clean.shape)

(5936438, 19)


In [307]:
emdata_0315_ref5_clean.head()

Unnamed: 0,year,doy,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV,cluster_reference
0,2008,359,-81.0384,25.1958,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0
1,2008,359,-81.0404,25.1984,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0
2,2008,359,-81.038,25.1981,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0
3,2008,359,-81.0594,25.2035,3,1600,0.0,6220.097576,1999.75772,3347.594423,153.981344,4.499455,23.797117,0.023231,0.082115,3,81,1,2008_0
4,2008,359,-81.057,25.2032,3,1600,62500.0,6220.097576,2041.37434,3417.260644,157.185824,4.593092,24.292355,0.022757,0.080441,3,81,2,2008_0


thirdcol = emdata_0315_ref5_clean.pop('cluster_reference')
emdata_0315_ref5_clean.insert(2, 'cluster_reference', thirdcol)
emdata_0315_ref5_clean[0:20]

In [313]:
emdata_0315_ref5_clean.tail()

Unnamed: 0,year,doy,cluster_reference,longitude,latitude,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burnday_source,BSEV
5936433,2003,181,2003_428,-119.3634,48.9603,1,1,62500.0,115.560666,107.471419,180.22957,7.522999,0.288023,1.096208,0.0,0.0,2,77,2
5936434,2003,182,2003_428,-119.3602,48.9609,2,2,0.0,359.23563,323.312067,542.194336,22.631845,0.866476,3.297783,0.0,0.0,2,77,1
5936435,2003,181,2003_428,-119.3642,48.9625,2,2,62500.0,324.937314,292.443583,490.427888,20.471051,0.783749,2.982925,0.0,0.0,2,77,2
5936436,2003,182,2003_428,-119.361,48.9631,2,2,62500.0,130.356018,117.320416,196.746338,8.212429,0.314419,1.196668,0.0,0.0,2,77,3
5936437,2003,219,2003_6230,-121.9887,48.5002,3,1200,62500.0,6424.431099,4040.568775,6279.043877,537.395647,30.223454,92.124968,0.236728,0.198929,3,77,3


In [309]:
emdata_0315_ref5_clean.reset_index(inplace = True, drop = True) 

In [310]:
emdata_0315_ref5_clean.to_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/Emissions Cluster Data/emissions_03_15_v5.csv', encoding='utf-8')

---


# Identify closest NASA data point to Center Points to pull cluster_reference id:

In [5]:
### Pulling in new centerpoint dataset:
centerpoints_0305 = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/Emissions Cluster Data/centerpoints_03_15_v5.17.csv', encoding='utf-8')
centerpoints_0305 = centerpoints_0305.drop(columns=['Unnamed: 0'])

In [6]:
centerpoints_0305.head()

Unnamed: 0,cluster_ref,cluster_reference,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,...,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag
0,0,2006_0,22.0,2006,208.0,-80.6759,25.4076,5461.0,1.0,1.0,...,25.677434,0.983079,3.741569,0.0,0.0,2.0,1.0,15.0,2.0,0.0
1,1,2006_1,96.0,2006,157.0,-80.5824,25.613,6845.0,1.0,1.0,...,19.526233,0.747576,2.845251,0.0,0.0,2.0,1.0,81.0,2.0,0.0
2,2,2006_2,164.0,2006,157.0,-80.5513,25.6198,6845.0,1.0,1.0,...,14.717643,0.563475,2.144571,0.0,0.0,2.0,1.0,15.0,2.0,0.0
3,3,2006_3,718.0,2006,49.0,-81.1249,26.0165,8222.0,3.0,1600.0,...,155.346853,4.539356,24.00815,0.011189,0.081394,4.0,1.0,15.0,2.0,0.0
4,4,2006_4,444.0,2006,155.0,-81.6018,26.0708,8217.0,3.0,1140.0,...,295.868818,8.645517,45.725181,0.008857,0.084716,2.0,1.0,15.0,2.0,1.0


In [2]:
NASA_M6 = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/4 NASA/DL_FIRE_M6_110066/fire_archive_M6_110066.csv')

In [3]:
print(NASA_M6.shape)
NASA_M6.head()

(2159468, 15)


Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,38.8142,-93.5539,300.8,1.0,1.0,2003-01-01,423,Terra,MODIS,33,6.2,267.0,10.4,N,0
1,19.3739,-155.113,318.8,3.1,1.7,2003-01-01,914,Terra,MODIS,97,6.2,288.7,98.4,N,2
2,19.3723,-155.1197,316.0,3.1,1.7,2003-01-01,914,Terra,MODIS,90,6.2,288.3,85.0,N,2
3,19.3589,-155.1107,325.2,3.1,1.7,2003-01-01,914,Terra,MODIS,100,6.2,292.0,131.8,N,2
4,19.3573,-155.1174,319.0,3.1,1.7,2003-01-01,914,Terra,MODIS,97,6.2,290.3,97.3,N,2


In [40]:
NASA_M6['doy'] = pd.DatetimeIndex(NASA_M6['acq_date']).day
NASA_M6['month'] = pd.DatetimeIndex(NASA_M6['acq_date']).month
NASA_M6['year'] = pd.DatetimeIndex(NASA_M6['acq_date']).year

In [66]:
print(NASA_M6.shape)
NASA_M6.head()

(2159468, 18)


Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,doy,month,year
0,38.8142,-93.5539,300.8,1.0,1.0,2003-01-01,423,Terra,MODIS,33,6.2,267.0,10.4,N,0,1,1,2003
1,19.3739,-155.113,318.8,3.1,1.7,2003-01-01,914,Terra,MODIS,97,6.2,288.7,98.4,N,2,1,1,2003
2,19.3723,-155.1197,316.0,3.1,1.7,2003-01-01,914,Terra,MODIS,90,6.2,288.3,85.0,N,2,1,1,2003
3,19.3589,-155.1107,325.2,3.1,1.7,2003-01-01,914,Terra,MODIS,100,6.2,292.0,131.8,N,2,1,1,2003
4,19.3573,-155.1174,319.0,3.1,1.7,2003-01-01,914,Terra,MODIS,97,6.2,290.3,97.3,N,2,1,1,2003


### Setting Year

In [44]:
centerpoints_xx = centerpoints_0305[(centerpoints_0305.year == 2003)]

In [67]:
NASA_M6_xx = NASA_M6[(NASA_M6.year == 2003)]
print(NASA_M6_xx.shape)

(114471, 18)


In [42]:
def cartesian(latitude, longitude, doy):
    # Convert to radians
    latitude = latitude * (math.pi / 180)
    longitude = longitude * (math.pi / 180)

    R = 6371 # 6378137.0 + elevation  # relative to centre of the earth
    X = R * math.cos(latitude) * math.cos(longitude)
    Y = R * math.cos(latitude) * math.sin(longitude)
    #Z = R * math.sin(latitude)
    Z = R * doy
    return (X, Y, Z)

In [59]:
centerpoint_places = []
for index, row in centerpoints_xx.iterrows():
    coordinates = [row['latitude'], row['longitude'], row['doy']]
    cartesian_coord = cartesian(*coordinates)
    centerpoint_places.append(cartesian_coord)

tree = spatial.KDTree(centerpoint_places)
centerpoint_places[1]

(906.809620407437, -5680.445447985819, 1331539.0)

In [60]:
def find_centerpoint(lat, lon, doy):
    cartesian_coord = cartesian(lat, lon, doy)
    closest = tree.query([cartesian_coord], p = 2)
    index = closest[1][0]
    return closest

## Function to assess coordiantes from NASA M6, and pull the Cluster_Reference from the Centerpoint file that is closest:

In [63]:
# Clusterdf will be panda dataframe that has cluster coordinates to cross-ref with the:     
def find_cluster_ref(sourcedf, targetdf):
    NASApoint_clusterref = pd.DataFrame()
    nasa_index = 0
    nasa_index = int(nasa_index)
    while nasa_index < len(sourcedf):
        #capture data from target/cluster to then use to find target match in other dataframe:
        #clusterid = clusterdf.iloc[clustindexnum]['id']
        #clusteryear = clusterdf.iloc[clustindexnum]['year']
        source_doy = sourcedf.iloc[nasa_index]['doy']
        source_year = sourcedf.iloc[nasa_index]['year']
        source_lat = sourcedf.iloc[nasa_index]['latitude']
        source_long = sourcedf.iloc[nasa_index]['longitude']
        
        # Running cluster find function:
        distance_location = find_centerpoint(source_lat, source_long, source_doy)
        targetlocation = distance_location[1]
        targetlocation = int(targetlocation)
        
        # Lines to pull data from the target dataframe, will need to be customized to the target DF. 
        target_lat = targetdf.iloc[targetlocation]['latitude']
        target_long = targetdf.iloc[targetlocation]['longitude']
        target_doy = targetdf.iloc[targetlocation]['doy']
        target_clusterref = targetdf.iloc[targetlocation]['cluster_reference']
        target_year = targetdf.iloc[targetlocation]['year']
        #target_discdoy = targetdf.iloc[targetlocation]['DISCOVERY_DOY']
        #target_contdoy = targetdf.iloc[targetlocation]['CONT_DOY']
        
        # Create new DF pulling in features from Cluster Points file and Target File:
        cdftemp = pd.DataFrame({'source_lat':[source_lat], 'source_long': [source_long], 'source_year':[source_year], 'source_doy':[source_doy],
                                'distance': distance_location[0], 'resultrow': distance_location[1], 'targetlat':[target_lat], 'targetlong':[target_long],'target_doy':[target_doy], 'target_year':[target_year], 
                                'target_clusterref':[target_clusterref]})

        NASApoint_clusterref = NASApoint_clusterref.append(cdftemp, ignore_index = True)
        nasa_index = nasa_index + 1
    return NASApoint_clusterref

In [64]:
nasa_center_match = find_cluster_ref(NASA_M6_xx,centerpoints_xx)
nasa_center_match.tail(10)

Unnamed: 0,source_lat,source_long,source_year,source_doy,distance,resultrow,targetlat,targetlong,target_doy,target_year,target_clusterref
114421,27.7007,-81.2883,2003,31,19.140709,2404,27.7066,-81.4827,31.0,2003,2003_2404
114422,27.2646,-81.7714,2003,31,36.407848,2404,27.7066,-81.4827,31.0,2003,2003_2404
114423,27.6926,-81.2704,2003,31,20.913794,2404,27.7066,-81.4827,31.0,2003,2003_2404
114424,28.3931,-82.1904,2003,31,48.084446,2469,29.2832,-82.2495,31.0,2003,2003_2469
114425,28.385,-82.1834,2003,31,48.595283,2469,29.2832,-82.2495,31.0,2003,2003_2469
114426,28.3963,-82.1734,2003,31,48.143434,2469,29.2832,-82.2495,31.0,2003,2003_2469
114427,27.3682,-81.3393,2003,31,22.417576,2404,27.7066,-81.4827,31.0,2003,2003_2404
114428,26.7763,-80.5053,2003,31,40.749336,789,26.4921,-80.8897,31.0,2003,2003_789
114429,26.7788,-80.4911,2003,31,42.117485,789,26.4921,-80.8897,31.0,2003,2003_789
114430,30.6996,-83.5102,2003,31,22.79408,2779,30.6886,-83.2719,31.0,2003,2003_2779


In [65]:
print(nasa_center_match.shape)

(114471, 11)


In [78]:
outside_range = nasa_center_match[(nasa_center_match.distance <200)]
print(outside_range.shape)

(30040, 11)


In [79]:
outside_range[0:40]

Unnamed: 0,source_lat,source_long,source_year,source_doy,distance,resultrow,targetlat,targetlong,target_doy,target_year,target_clusterref
31,33.3287,-95.5265,2003,1,178.234264,4275,36.0743,-95.0936,1.0,2003,2003_4276
32,33.3272,-95.5372,2003,1,178.543135,4275,36.0743,-95.0936,1.0,2003,2003_4276
35,34.7806,-95.253,2003,1,84.626959,4275,36.0743,-95.0936,1.0,2003,2003_4276
36,34.7912,-95.2437,2003,1,83.82351,4275,36.0743,-95.0936,1.0,2003,2003_4276
37,34.7897,-95.2548,2003,1,84.08624,4275,36.0743,-95.0936,1.0,2003,2003_4276
38,34.7821,-95.2418,2003,1,84.365766,4275,36.0743,-95.0936,1.0,2003,2003_4276
66,29.7394,-90.3659,2003,2,117.015716,2905,31.6348,-89.8838,2.0,2003,2003_2905
67,29.7353,-90.3733,2003,2,117.503964,2905,31.6348,-89.8838,2.0,2003,2003_2905
123,26.3241,-97.882,2003,3,31.339163,759,26.0924,-97.5893,3.0,2003,2003_759
124,26.3279,-97.8624,2003,3,29.598079,759,26.0924,-97.5893,3.0,2003,2003_759


In [68]:
nasa_center_match.to_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/nasa_to_clusterref_2003.csv', encoding='utf-8')

# Using K-D Tree to Find Closest Point in 1.88M Data Set. 

## Settting up Sample Data for the year 2011

In [26]:
### Pairing Down Cluster Data to select a single sample - Temp. 
pd.set_option('display.max_columns', 30)
FCECO2011 = centerpoints_0305[(centerpoints_0305.ECO > 600) & (centerpoints_0305.year == 2011)]
FCECO2011.head(5)

Unnamed: 0,cluster_ref,cluster_reference,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag
69993,740,2011_740,499367.0,2011,106.0,-95.6938,37.9568,63856.0,3.0,1800.0,62500.0,7600.018919,4799.992613,7459.188521,638.399018,35.903945,109.439832,0.117639,0.353563,4.0,1.0,81.0,2.0,0.0
70013,760,2011_760,529213.0,2011,203.0,-118.5411,36.2993,66420.0,3.0,1370.0,62500.0,7771.613688,4969.788814,7723.051818,660.981912,37.17402,113.311185,0.23521,0.249758,2.0,1.0,15.0,2.0,1.0
70056,803,2011_803,895703.0,2011,281.0,-85.4237,38.9977,71781.0,3.0,1800.0,62500.0,7600.018919,5624.943621,8741.162387,748.117502,42.074578,128.248715,0.144464,0.404291,2.0,4.0,78.0,2.0,1.0
70171,918,2011_918,618476.0,2011,267.0,-110.0384,43.8912,98316.0,3.0,1260.0,62500.0,6989.973251,5227.356234,8123.311587,695.238379,39.100625,119.183722,0.290946,0.222674,2.0,1.0,15.0,4.0,0.0
70190,937,2011_937,626692.0,2011,271.0,-110.2708,44.4695,101542.0,3.0,1280.0,62500.0,6594.534395,4680.337993,7273.245242,622.484953,35.008928,106.711706,0.253263,0.275338,3.0,1.0,81.0,4.0,0.0


In [27]:
FCECO2011.shape

(174, 24)

In [12]:
### Creating smaller DF for 2011 Unique Fires from USDA 1.88m fire set. 
usdafires2011 = usdafiredb_onemil[(usdafiredb_onemil.FIRE_YEAR == 2011)]
usdafires2011 = usdafires2011.drop(['SOURCE_REPORTING_UNIT', 'SOURCE_REPORTING_UNIT_NAME', 'LOCAL_FIRE_REPORT_ID', 
                                    'LOCAL_INCIDENT_ID', 'OWNER_CODE','OWNER_DESCR', 'X', 'Y','FOD_ID', 'FPA_ID', 
                                    'SOURCE_SYSTEM_TYPE', 'SOURCE_SYSTEM'], 1)
usdafires2011.shape

(90552, 28)

In [13]:
usdafires2011.head(3)

Unnamed: 0,OBJECTID,NWCG_REPORTING_AGENCY,NWCG_REPORTING_UNIT_ID,NWCG_REPORTING_UNIT_NAME,FIRE_CODE,FIRE_NAME,ICS_209_INCIDENT_NUMBER,ICS_209_NAME,MTBS_ID,MTBS_FIRE_NAME,...,CONT_DOY,CONT_TIME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,STATE,COUNTY,FIPS_CODE,FIPS_NAME
1453964,1453965,FS,USAKCGF,Chugach National Forest,EK3T,VFD BEAR CREEK #1,,,,,...,80.0,1618.0,0.1,A,60.246389,-149.349444,AK,,,
1453965,1453966,FS,USAKCGF,Chugach National Forest,F72E,CPR LNDG ORGANIC DMP,,,,,...,108.0,1156.0,0.1,A,60.475833,-149.7525,AK,,,
1453966,1453967,FS,USAKCGF,Chugach National Forest,EK7P,TOKLAT WAY DEBRIS,,,,,...,131.0,1331.0,0.1,A,60.514444,-149.4675,AK,,,


## Define coordinates into cartesian numbers.  6371 is kms. 

In [14]:
def cartesian(latitude, longitude, elevation = 0):
    # Convert to radians
    latitude = latitude * (math.pi / 180)
    longitude = longitude * (math.pi / 180)

    R = 6371 # 6378137.0 + elevation  # relative to centre of the earth
    X = R * math.cos(latitude) * math.cos(longitude)
    Y = R * math.cos(latitude) * math.sin(longitude)
    Z = R * math.sin(latitude)
    return (X, Y, Z)

## Place target data/coordinates into a Places List in order to change into Cartesian coordinates. 

In [17]:
places = []
for index, row in usdafires2011.iterrows():
    coordinates = [row['LATITUDE'], row['LONGITUDE']]
    cartesian_coord = cartesian(*coordinates)
    places.append(cartesian_coord)

tree = spatial.KDTree(places)
places[1]

(-2712.1402271451984, -1581.5148494831365, 5543.71239957171)

In [18]:
def find_firecluster(lat, lon):
    cartesian_coord = cartesian(lat, lon)
    closest = tree.query([cartesian_coord], p = 2)
    index = closest[1][0]
    return closest

### Testing inputs into function:

In [None]:
targetlat = 25
targetlong = cluster_group_temp3['longitude'].iloc[0]

print(targetlat, targetlong)

In [None]:
resultfire0 = find_firecluster(targetlat, targetlong)
resultfire0

In [None]:
resultfire = find_firecluster(38.4782, -120.3440)
resultfire

## Build new DataFrame to hold selection data. 

clu1 = pd.DataFrame({'clusterlat': [targetlat], 'clusterlog': [targetlog], 'distance': resultfire[0], 'resultrow': resultfire[1]})
#clu = clue.assign('clusterlat': targetlat, 'clusterlog': targetlog)
indexnum = resultfire[1]
clu1

### Testing to see how to pull out number from DF, to use as input: 
indexnum = clu1.loc[0]['resultrow']
indexnum = int(indexnum)
indexnum

usdafires2011.iloc[indexnum]

unique_lat = usdafires2011.iloc[indexnum]['LATITUDE']
unique_log = usdafires2011.iloc[indexnum]['LONGITUDE']
unique_firename = usdafires2011.iloc[indexnum]['FIRE_NAME']
unique_firesize = usdafires2011.iloc[indexnum]['FIRE_SIZE']
unique_fireclass = usdafires2011.iloc[indexnum]['FIRE_SIZE_CLASS']
unique_discdate = usdafires2011.iloc[indexnum]['DISCOVERY_DOY']
unique_contdate = usdafires2011.iloc[indexnum]['CONT_DOY']

In [None]:
clu2 = clu1.assign(unique_lat1 = unique_lat, unique_log = unique_log, unique_firename = unique_firename,unique_firesize = unique_firesize, unique_fireclass = unique_fireclass,unique_discdate = unique_discdate, unique_contdate = unique_contdate)

In [None]:
clu2

# Perform Function to locate Closest Coordinates and Pull data features from target DF to main DF. 

In [20]:
# Clusterdf will be panda dataframe that has cluster coordinates to cross-ref with the     
def findclustermatch(clusterdf, targetdf):
    clustertotargetdf = pd.DataFrame()
    clustindexnum = 0
    clustindexnum = int(clustindexnum)
    while clustindexnum < len(clusterdf):
        #capture data from target/cluster to then use to find target match in other dataframe:
        clusterid = clusterdf.iloc[clustindexnum]['id']
        clusteryear = clusterdf.iloc[clustindexnum]['year']
        clusterdoy = clusterdf.iloc[clustindexnum]['doy']
        clusterlat = clusterdf.iloc[clustindexnum]['latitude']
        clusterlong = clusterdf.iloc[clustindexnum]['longitude']
        
        # Running cluster find function:
        distance_location = find_firecluster(clusterlat, clusterlong)
        targetlocation = distance_location[1]
        targetlocation = int(targetlocation)
        
        # Lines to pull data from the target dataframe, will need to be customized to the target DF. 
        target_lat = targetdf.iloc[targetlocation]['LATITUDE']
        target_long = targetdf.iloc[targetlocation]['LONGITUDE']
        target_firename = targetdf.iloc[targetlocation]['FIRE_NAME']
        target_firesize = targetdf.iloc[targetlocation]['FIRE_SIZE']
        target_fireclass = targetdf.iloc[targetlocation]['FIRE_SIZE_CLASS']
        target_year = targetdf.iloc[targetlocation]['FIRE_YEAR']
        target_discdoy = targetdf.iloc[targetlocation]['DISCOVERY_DOY']
        target_contdoy = targetdf.iloc[targetlocation]['CONT_DOY']
        
        # Create new DF pulling in features from Cluster Points file and Target File:
        cdftemp = pd.DataFrame({'cluster_index':[clustindexnum], 'cluster_id': [clusterid], 'clusteryear':[clusteryear], 'clusterdoy':[clusterdoy], 'clusterlat': [clusterlat], 'clusterlog': [clusterlong], 
                                'distance': distance_location[0], 'resultrow': distance_location[1], 'targetlat':[target_lat], 'targetlong':[target_long], 'firename':[target_firename],
                                'firesize':[target_firesize], 'fireclass':[target_fireclass],'fire_year':[target_year], 'discovery_doy': [target_discdoy], 'contain_doy': [target_contdoy]})
        
        clustertotargetdf = clustertotargetdf.append(cdftemp, ignore_index = True)
        clustindexnum = clustindexnum + 1
    return clustertotargetdf

In [28]:
cluster_targetmatch = findclustermatch(FCECO2011,usdafires2011)
cluster_targetmatch.tail(50)

Unnamed: 0,cluster_index,cluster_id,clusteryear,clusterdoy,clusterlat,clusterlog,distance,resultrow,targetlat,targetlong,firename,firesize,fireclass,fire_year,discovery_doy,contain_doy
124,124,900615.0,2011,282.0,45.8792,-92.7197,0.112992,41383,45.8784,-92.7206,ST. CROIX,11.0,C,2011,282,282.0
125,125,900626.0,2011,280.0,45.9464,-94.7071,0.111497,43912,45.94578,-94.708234,,120.0,D,2011,279,279.0
126,126,900631.0,2011,103.0,45.9487,-93.0997,0.205871,44851,45.950034,-93.101546,,44.0,C,2011,102,102.0
127,127,900635.0,2011,253.0,43.3531,-122.4238,0.089856,2436,43.3525,-122.423056,CINDER,23.0,C,2011,253,261.0
128,128,900637.0,2011,313.0,44.3622,-75.0886,0.132431,18959,44.36118,-75.08774,DEGRASSE FIRE,29.3,C,2011,313,314.0
129,129,900784.0,2011,282.0,46.2177,-92.3794,0.196257,46486,46.216866,-92.381648,,67.0,C,2011,281,282.0
130,130,900792.0,2011,310.0,46.26,-92.3576,0.133738,44966,46.259959,-92.355861,,85.0,C,2011,309,310.0
131,131,900797.0,2011,290.0,46.2792,-92.4019,0.128563,46516,46.278205,-92.402751,,14.0,C,2011,289,289.0
132,132,900884.0,2011,129.0,46.0309,-85.0277,0.070727,319,46.030833,-85.028611,WORTH ROAD,10.1,C,2011,129,129.0
133,133,900919.0,2011,128.0,46.5376,-94.7702,0.179652,44160,46.537217,-94.767918,,27.0,C,2011,127,127.0


In [None]:
sns.set(style="ticks", color_codes=True)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
sns.palplot(sns.color_palette(flatui))