# Notebook for creating connections between DataSets:

### Query Code - Location of closest coordinate out of a list of coordinates. 
##### - https://www.timvink.nl/closest-coordinates/
##### - https://stackoverflow.com/questions/39107896/efficiently-finding-the-closest-coordinate-pair-from-a-set-in-python

In [21]:
import pandas as pd
import numpy as np
from geopy.distance import great_circle
import math
import seaborn as sns
from scipy import spatial
import matplotlib.pyplot as plt
import datetime

from math import *

from scipy import spatial

places = []
for index, row in geonames.iterrows():
    coordinates = [row['latitude'], row['longitude']]
    cartesian_coord = cartesian(*coordinates)
    places.append(cartesian_coord)

tree = spatial.KDTree(places)

def find_population(lat, lon):
    cartesian_coord = cartesian(lat, lon)
    closest = tree.query([cartesian_coord], p = 2)
    index = closest[1][0]
    return {
        'name' : geonames.name[index],
        'latitude' : geonames.latitude[index],
        'longitude' : geonames.longitude[index],
        'population' : geonames.population[index],
        'distance' : closest[0][0]
    }

## One-Time - Combining Cluster Point CSVs:

## Load related DataFrames: Fire Cluster Center Points, Cluster Points and 1.88m Fire Records:

In [2]:
# Expanding number of columns:
pd.set_option('display.max_columns', 40)

In [11]:
# Loading 1.88m fire record table:
usdafiredb_onemil = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/1_188m_USDA Fire Database/Fire_Program_Analysis__Fire_Occurrence_Database_Feature_Layer.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
FireCenterPoints = pd.read_csv('../data/centerpoints_03_15_v2.csv')

In [4]:
emdata_0315 = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/9 Missoula Emisions Data RDS-2017-0039/Emissions_Year/emissions_2003to2015_cleanv1.csv')

In [144]:
ClusterPoints = pd.read_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/Emissions Cluster Data/clusterpoints_03_15_v2.csv', encoding='utf-8')

## Fire Cluster Points Data:

In [145]:
print(ClusterPoints.shape)
ClusterPoints[200000:200005]

(5960572, 5)


Unnamed: 0.1,Unnamed: 0,0,1,ClusterNum,Year
200000,200000,31.7406,-109.2987,361,2011
200001,200001,31.7409,-109.2961,361,2011
200002,200002,31.7412,-109.2935,361,2011
200003,200003,31.7415,-109.2909,361,2011
200004,200004,31.7418,-109.2882,361,2011


## Creating New Fire Cluster Point Dataframe:
#### Dropping unused columns
#### Renaming column headers
#### Assigning new Cluster Reference to cluster points

In [148]:
ClusterPoints = ClusterPoints.drop(columns=['Unnamed: 0'])
ClusterPoints[0:1]

Unnamed: 0,0,1,ClusterNum,Year
0,25.1903,-81.0394,0,2011


In [149]:
ClusterPoints = ClusterPoints.rename(columns={'0': 'latitude', '1': 'longitude', 'Year': 'year'})
ClusterPoints[0:5]

Unnamed: 0,latitude,longitude,ClusterNum,year
0,25.1903,-81.0394,0,2011
1,25.19,-81.037,0,2011
2,25.1896,-81.0346,0,2011
3,25.1926,-81.039,0,2011
4,25.1922,-81.0366,0,2011


In [150]:
ClusterPoints['cluster_reference'] = ClusterPoints['year'].astype(str) + "_" + ClusterPoints['ClusterNum'].astype(str)
ClusterPoints[0:10]

Unnamed: 0,latitude,longitude,ClusterNum,year,cluster_reference
0,25.1903,-81.0394,0,2011,2011_0
1,25.19,-81.037,0,2011,2011_0
2,25.1896,-81.0346,0,2011,2011_0
3,25.1926,-81.039,0,2011,2011_0
4,25.1922,-81.0366,0,2011,2011_0
5,25.1918,-81.0342,0,2011,2011_0
6,25.1995,-81.0701,0,2011,2011_0
7,25.1991,-81.0677,0,2011,2011_0
8,25.1952,-81.0411,0,2011,2011_0
9,25.1948,-81.0386,0,2011,2011_0


In [151]:
ClusterPoints[1000:1002]
print(ClusterPoints.shape)

(5960572, 5)


In [152]:
# Writing new dataset to CSV:
ClusterPoints.to_csv('/Users/AlfHaugen/Python/Wildfire_Data/FireExports/Emissions Cluster Data/clusterpoints_03_15_ref1.csv', encoding='utf-8')

## Fire Center Points Data:
#### Renaming columns, adding cluster_reference id, moving column. 

In [188]:
print(FireCenterPoints.shape)
FireCenterPoints[0:2]

(109321, 23)


Unnamed: 0.1,Unnamed: 0,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag
0,0,22.0,2006.0,208.0,-80.6759,25.4076,5461.0,1.0,1.0,62500.0,394.430634,366.82049,615.157961,25.677434,0.983079,3.741569,0.0,0.0,2.0,1.0,15.0,2.0,0.0
1,1,96.0,2006.0,157.0,-80.5824,25.613,6845.0,1.0,1.0,62500.0,299.942136,278.946186,467.792755,19.526233,0.747576,2.845251,0.0,0.0,2.0,1.0,81.0,2.0,0.0


In [189]:
FireCenterPoints = FireCenterPoints.rename(columns={'Unnamed: 0': 'cluster_ref'})

In [190]:
FireCenterPoints = FireCenterPoints.astype({"year": int}) 

In [191]:
FireCenterPoints.head(1)

Unnamed: 0,cluster_ref,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag
0,0,22.0,2006,208.0,-80.6759,25.4076,5461.0,1.0,1.0,62500.0,394.430634,366.82049,615.157961,25.677434,0.983079,3.741569,0.0,0.0,2.0,1.0,15.0,2.0,0.0


In [192]:
FireCenterPoints['cluster_reference'] = FireCenterPoints['year'].astype(str) + "_" + FireCenterPoints['cluster_ref'].astype(str)

In [193]:
FireCenterPoints[0:5]

Unnamed: 0,cluster_ref,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag,cluster_reference
0,0,22.0,2006,208.0,-80.6759,25.4076,5461.0,1.0,1.0,62500.0,394.430634,366.82049,615.157961,25.677434,0.983079,3.741569,0.0,0.0,2.0,1.0,15.0,2.0,0.0,2006_0
1,1,96.0,2006,157.0,-80.5824,25.613,6845.0,1.0,1.0,62500.0,299.942136,278.946186,467.792755,19.526233,0.747576,2.845251,0.0,0.0,2.0,1.0,81.0,2.0,0.0,2006_1
2,2,164.0,2006,157.0,-80.5513,25.6198,6845.0,1.0,1.0,62500.0,226.077462,210.25204,352.592671,14.717643,0.563475,2.144571,0.0,0.0,2.0,1.0,15.0,2.0,0.0,2006_2
3,3,718.0,2006,49.0,-81.1249,26.0165,8222.0,3.0,1600.0,62500.0,6220.097576,2017.491592,3377.280924,155.346853,4.539356,24.00815,0.011189,0.081394,4.0,1.0,15.0,2.0,0.0,2006_3
4,4,444.0,2006,155.0,-81.6018,26.0708,8217.0,3.0,1140.0,62500.0,7646.952224,3842.452183,6432.264954,295.868818,8.645517,45.725181,0.008857,0.084716,2.0,1.0,15.0,2.0,1.0,2006_4


In [194]:
first_col = FireCenterPoints.pop('cluster_reference')
FireCenterPoints.insert(1, 'cluster_reference', first_col)
FireCenterPoints[0:2]

Unnamed: 0,cluster_ref,cluster_reference,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag
0,0,2006_0,22.0,2006,208.0,-80.6759,25.4076,5461.0,1.0,1.0,62500.0,394.430634,366.82049,615.157961,25.677434,0.983079,3.741569,0.0,0.0,2.0,1.0,15.0,2.0,0.0
1,1,2006_1,96.0,2006,157.0,-80.5824,25.613,6845.0,1.0,1.0,62500.0,299.942136,278.946186,467.792755,19.526233,0.747576,2.845251,0.0,0.0,2.0,1.0,81.0,2.0,0.0


-----

# Identify closest NASA data point to Center Points to pull cluster_reference id:

In [3]:
### Pulling in new centerpoint dataset:
centerpoints_0305 = pd.read_csv('../data/centerpoints_03_15_v5.17.csv', encoding='utf-8')
centerpoints_0305 = centerpoints_0305.drop(columns=['Unnamed: 0'])

In [4]:
centerpoints_0305.head()

Unnamed: 0,cluster_ref,cluster_reference,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag
0,0,2006_0,22.0,2006,208.0,-80.6759,25.4076,5461.0,1.0,1.0,62500.0,394.430634,366.82049,615.157961,25.677434,0.983079,3.741569,0.0,0.0,2.0,1.0,15.0,2.0,0.0
1,1,2006_1,96.0,2006,157.0,-80.5824,25.613,6845.0,1.0,1.0,62500.0,299.942136,278.946186,467.792755,19.526233,0.747576,2.845251,0.0,0.0,2.0,1.0,81.0,2.0,0.0
2,2,2006_2,164.0,2006,157.0,-80.5513,25.6198,6845.0,1.0,1.0,62500.0,226.077462,210.25204,352.592671,14.717643,0.563475,2.144571,0.0,0.0,2.0,1.0,15.0,2.0,0.0
3,3,2006_3,718.0,2006,49.0,-81.1249,26.0165,8222.0,3.0,1600.0,62500.0,6220.097576,2017.491592,3377.280924,155.346853,4.539356,24.00815,0.011189,0.081394,4.0,1.0,15.0,2.0,0.0
4,4,2006_4,444.0,2006,155.0,-81.6018,26.0708,8217.0,3.0,1140.0,62500.0,7646.952224,3842.452183,6432.264954,295.868818,8.645517,45.725181,0.008857,0.084716,2.0,1.0,15.0,2.0,1.0


In [5]:
NASA_M6 = pd.read_csv('../data/fire_archive_M6_110066.csv')

In [6]:
print(NASA_M6.shape)
NASA_M6.head()

(2159468, 15)


Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,38.8142,-93.5539,300.8,1.0,1.0,2003-01-01,423,Terra,MODIS,33,6.2,267.0,10.4,N,0
1,19.3739,-155.113,318.8,3.1,1.7,2003-01-01,914,Terra,MODIS,97,6.2,288.7,98.4,N,2
2,19.3723,-155.1197,316.0,3.1,1.7,2003-01-01,914,Terra,MODIS,90,6.2,288.3,85.0,N,2
3,19.3589,-155.1107,325.2,3.1,1.7,2003-01-01,914,Terra,MODIS,100,6.2,292.0,131.8,N,2
4,19.3573,-155.1174,319.0,3.1,1.7,2003-01-01,914,Terra,MODIS,97,6.2,290.3,97.3,N,2


In [7]:
# Assign the DOY, Month and Year from the Acq Date:

#NASA_M6['doy'] = pd.DatetimeIndex(NASA_M6['acq_date']).day
NASA_M6['datetime'] = pd.to_datetime(NASA_M6['acq_date'], infer_datetime_format=True) # Turn acq_date to date/time.
NASA_M6['doy'] = NASA_M6['datetime'].dt.dayofyear # Then turn date time into day of year. 

NASA_M6['month'] = pd.DatetimeIndex(NASA_M6['acq_date']).month
NASA_M6['year'] = pd.DatetimeIndex(NASA_M6['acq_date']).year

In [9]:
print(NASA_M6.shape)
NASA_M6.tail()

(2159468, 19)


Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,datetime,doy,month,year
2159463,38.3827,-92.7025,309.9,1.4,1.2,2019-12-31,1939,Aqua,MODIS,69,6.3,280.5,21.2,D,0,2019-12-31,365,12,2019
2159464,38.2008,-120.5822,303.9,1.1,1.0,2019-12-31,2118,Aqua,MODIS,57,6.3,283.2,8.2,D,0,2019-12-31,365,12,2019
2159465,42.1819,-123.5475,302.0,1.0,1.0,2019-12-31,2119,Aqua,MODIS,50,6.3,277.4,8.4,D,0,2019-12-31,365,12,2019
2159466,39.5075,-123.7523,309.2,1.0,1.0,2019-12-31,2119,Aqua,MODIS,68,6.3,284.9,11.2,D,0,2019-12-31,365,12,2019
2159467,40.5065,-123.7505,302.4,1.0,1.0,2019-12-31,2119,Aqua,MODIS,52,6.3,283.2,7.5,D,0,2019-12-31,365,12,2019


### Setting Up K-D Tree Lat/Long Inputs and Year

In [12]:
# Set year for centerpoint data to support target search for closest NASA M6 location to cluster location. 
centerpoints_xx = centerpoints_0305[(centerpoints_0305.year == 2008)]

In [13]:
# Set year for the NASA M6 data. 
NASA_M6_xx = NASA_M6[(NASA_M6.year == 2008)]
print(NASA_M6_xx.shape)

(119797, 19)


In [14]:
# Turning latitude, longitude, doy into cartesian coordinates.
def cartesian(latitude, longitude, doy):
    # Convert to radians
    latitude = latitude * (math.pi / 180)
    longitude = longitude * (math.pi / 180)

    R = 6371 # 6378137.0 + elevation  # relative to centre of the earth
    X = R * math.cos(latitude) * math.cos(longitude)
    Y = R * math.cos(latitude) * math.sin(longitude)
    #Z = R * math.sin(latitude)
    Z = 2 * doy
    return (X, Y, Z)

In [15]:
# Placing latitude/longitude from target dataframe into a list. 
centerpoint_places = []
for index, row in centerpoints_xx.iterrows():
    coordinates = [row['latitude'], row['longitude'], row['doy']]
    cartesian_coord = cartesian(*coordinates)
    centerpoint_places.append(cartesian_coord)

tree = spatial.KDTree(centerpoint_places)
centerpoint_places[1]

(907.4565604008313, -5681.440434218411, 640.0)

In [16]:
# Set function for K-D Tree to search list of coordinates in target dataframe for closest match. 
def find_centerpoint(lat, lon, doy):
    cartesian_coord = cartesian(lat, lon, doy)
    closest = tree.query([cartesian_coord], p = 2)
    index = closest[1][0]
    return closest

### Function to assign Cluster Reference label to NASA M6 data. 
Functin will determine the closest cluster centerponit and cluster label from the DBScan results    
and pull over the Cluster_Reference label to each NASA M6 data record.

In [17]:
# Function to iterate through each NASA M6 recrod, identify the closest coordinate match between the NASA M6 record
# and the created cluster centerpoints, then create a new dataframe to capture the distance, result row, lat/long,
# year, and finally cluster reference from the centerpoint to the NASA M6 record.

def find_cluster_ref(sourcedf, targetdf):
    NASApoint_clusterref = pd.DataFrame()
    nasa_index = 0
    nasa_index = int(nasa_index)
    while nasa_index < len(sourcedf):
        #capture data from target/cluster to then use to find target match in other dataframe:
        source_doy = sourcedf.iloc[nasa_index]['doy']
        source_year = sourcedf.iloc[nasa_index]['year']
        source_lat = sourcedf.iloc[nasa_index]['latitude']
        source_long = sourcedf.iloc[nasa_index]['longitude']
        
        # Running cluster find function:
        distance_location = find_centerpoint(source_lat, source_long, source_doy)
        targetlocation = distance_location[1]
        targetlocation = int(targetlocation)
        
        # Lines to pull data from the target dataframe, will need to be customized to the target DF. 
        target_lat = targetdf.iloc[targetlocation]['latitude']
        target_long = targetdf.iloc[targetlocation]['longitude']
        target_doy = targetdf.iloc[targetlocation]['doy']
        target_clusterref = targetdf.iloc[targetlocation]['cluster_reference']
        target_year = targetdf.iloc[targetlocation]['year']
        #target_discdoy = targetdf.iloc[targetlocation]['DISCOVERY_DOY']
        #target_contdoy = targetdf.iloc[targetlocation]['CONT_DOY']
        
        # Create new DF pulling in features from Cluster Points file and Target File:
        cdftemp = pd.DataFrame({'source_lat':[source_lat], 'source_long': [source_long], 'source_year':[source_year], 
                                'source_doy':[source_doy],'distance': distance_location[0], 'resultrow': distance_location[1], 
                                'targetlat':[target_lat], 'targetlong':[target_long],'target_doy':[target_doy], 
                                'target_year':[target_year], 'target_clusterref':[target_clusterref]})

        NASApoint_clusterref = NASApoint_clusterref.append(cdftemp, ignore_index = True)
        nasa_index = nasa_index + 1
    return NASApoint_clusterref

In [18]:
nasa_center_match = find_cluster_ref(NASA_M6_xx,centerpoints_xx)
nasa_center_match.tail(8)

Unnamed: 0,source_lat,source_long,source_year,source_doy,distance,resultrow,targetlat,targetlong,target_doy,target_year,target_clusterref
119789,32.3796,-89.2364,2008,366,58.698376,4372,31.6486,-88.8592,357.0,2008,2008_4373
119790,32.3792,-89.2441,2008,366,59.123724,4372,31.6486,-88.8592,357.0,2008,2008_4373
119791,32.3772,-89.2501,2008,366,59.38668,4372,31.6486,-88.8592,357.0,2008,2008_4373
119792,32.7128,-88.6323,2008,366,28.725507,4925,32.2904,-88.486,366.0,2008,2008_4926
119793,32.715,-88.6197,2008,366,28.298676,4925,32.2904,-88.486,366.0,2008,2008_4926
119794,31.3379,-97.7039,2008,366,46.206879,4238,31.6874,-98.1335,362.0,2008,2008_4239
119795,31.3389,-97.6997,2008,366,46.532762,4238,31.6874,-98.1335,362.0,2008,2008_4239
119796,35.05,-118.4912,2008,366,81.503215,795,36.1117,-118.6904,346.0,2008,2008_795


In [65]:
print(nasa_center_match.shape)

(114471, 11)


In [78]:
# Checking count of 
outside_range = nasa_center_match[(nasa_center_match.distance <150)]
print(outside_range.shape)

(30040, 11)


In [68]:
nasa_center_match.to_csv('..data/nasa_to_cluster_wdoy_2008_May27.csv', encoding='utf-8')

---
### Turn euclidean distance to KMs and review. 
Determine distance range/threshold in order to select which NASA M6 lat/long rows are close     
enough to a cluster reference to be kept for model input. 

In [22]:
import numpy as np

def deg2rad(degree):
    rad = degree * 2*np.pi / 360
    return(rad)

def distToKM(x):
    R = 6367 # earth radius
    gamma = 2*np.arcsin(deg2rad(x/(2*R))) # compute the angle of the isosceles triangle
    dist = 2*R*sin(gamma/2) # compute the side of the triangle
    return(dist)

In [23]:
distToKM(150)

2.6179938779914944

---

# Using K-D Tree to Find Closest Point in 1.88M Data Set. 

## Settting up Sample Data for the year 2011

In [26]:
### Pairing Down Cluster Data to select a single sample - Temp. 
pd.set_option('display.max_columns', 30)
FCECO2011 = centerpoints_0305[(centerpoints_0305.ECO > 600) & (centerpoints_0305.year == 2011)]
FCECO2011.head(5)

Unnamed: 0,cluster_ref,cluster_reference,id,year,doy,longitude,latitude,grid10k,covertype,fuelcode,area_burned,prefire_fuel,consumed_fuel,ECO2,ECO,ECH4,EPM2.5,cwd_frac,duff_frac,fuel_moisture_class,burn_source,burnday_source,BSEV,BSEV_flag
69993,740,2011_740,499367.0,2011,106.0,-95.6938,37.9568,63856.0,3.0,1800.0,62500.0,7600.018919,4799.992613,7459.188521,638.399018,35.903945,109.439832,0.117639,0.353563,4.0,1.0,81.0,2.0,0.0
70013,760,2011_760,529213.0,2011,203.0,-118.5411,36.2993,66420.0,3.0,1370.0,62500.0,7771.613688,4969.788814,7723.051818,660.981912,37.17402,113.311185,0.23521,0.249758,2.0,1.0,15.0,2.0,1.0
70056,803,2011_803,895703.0,2011,281.0,-85.4237,38.9977,71781.0,3.0,1800.0,62500.0,7600.018919,5624.943621,8741.162387,748.117502,42.074578,128.248715,0.144464,0.404291,2.0,4.0,78.0,2.0,1.0
70171,918,2011_918,618476.0,2011,267.0,-110.0384,43.8912,98316.0,3.0,1260.0,62500.0,6989.973251,5227.356234,8123.311587,695.238379,39.100625,119.183722,0.290946,0.222674,2.0,1.0,15.0,4.0,0.0
70190,937,2011_937,626692.0,2011,271.0,-110.2708,44.4695,101542.0,3.0,1280.0,62500.0,6594.534395,4680.337993,7273.245242,622.484953,35.008928,106.711706,0.253263,0.275338,3.0,1.0,81.0,4.0,0.0


In [27]:
FCECO2011.shape

(174, 24)

In [12]:
### Creating smaller DF for 2011 Unique Fires from USDA 1.88m fire set. 
usdafires2011 = usdafiredb_onemil[(usdafiredb_onemil.FIRE_YEAR == 2011)]
usdafires2011 = usdafires2011.drop(['SOURCE_REPORTING_UNIT', 'SOURCE_REPORTING_UNIT_NAME', 'LOCAL_FIRE_REPORT_ID', 
                                    'LOCAL_INCIDENT_ID', 'OWNER_CODE','OWNER_DESCR', 'X', 'Y','FOD_ID', 'FPA_ID', 
                                    'SOURCE_SYSTEM_TYPE', 'SOURCE_SYSTEM'], 1)
usdafires2011.shape

(90552, 28)

In [13]:
usdafires2011.head(3)

Unnamed: 0,OBJECTID,NWCG_REPORTING_AGENCY,NWCG_REPORTING_UNIT_ID,NWCG_REPORTING_UNIT_NAME,FIRE_CODE,FIRE_NAME,ICS_209_INCIDENT_NUMBER,ICS_209_NAME,MTBS_ID,MTBS_FIRE_NAME,...,CONT_DOY,CONT_TIME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,STATE,COUNTY,FIPS_CODE,FIPS_NAME
1453964,1453965,FS,USAKCGF,Chugach National Forest,EK3T,VFD BEAR CREEK #1,,,,,...,80.0,1618.0,0.1,A,60.246389,-149.349444,AK,,,
1453965,1453966,FS,USAKCGF,Chugach National Forest,F72E,CPR LNDG ORGANIC DMP,,,,,...,108.0,1156.0,0.1,A,60.475833,-149.7525,AK,,,
1453966,1453967,FS,USAKCGF,Chugach National Forest,EK7P,TOKLAT WAY DEBRIS,,,,,...,131.0,1331.0,0.1,A,60.514444,-149.4675,AK,,,


## Define coordinates into cartesian numbers.  6371 is kms. 

In [14]:
def cartesian(latitude, longitude, elevation = 0):
    # Convert to radians
    latitude = latitude * (math.pi / 180)
    longitude = longitude * (math.pi / 180)

    R = 6371 # 6378137.0 + elevation  # relative to centre of the earth
    X = R * math.cos(latitude) * math.cos(longitude)
    Y = R * math.cos(latitude) * math.sin(longitude)
    Z = R * math.sin(latitude)
    return (X, Y, Z)

## Place target data/coordinates into a Places List in order to change into Cartesian coordinates. 

In [17]:
places = []
for index, row in usdafires2011.iterrows():
    coordinates = [row['LATITUDE'], row['LONGITUDE']]
    cartesian_coord = cartesian(*coordinates)
    places.append(cartesian_coord)

tree = spatial.KDTree(places)
places[1]

(-2712.1402271451984, -1581.5148494831365, 5543.71239957171)

In [18]:
def find_firecluster(lat, lon):
    cartesian_coord = cartesian(lat, lon)
    closest = tree.query([cartesian_coord], p = 2)
    index = closest[1][0]
    return closest

### Testing inputs into function:

In [None]:
targetlat = 25
targetlong = cluster_group_temp3['longitude'].iloc[0]

print(targetlat, targetlong)

In [None]:
resultfire0 = find_firecluster(targetlat, targetlong)
resultfire0

In [None]:
resultfire = find_firecluster(38.4782, -120.3440)
resultfire

## Build new DataFrame to hold selection data. 

clu1 = pd.DataFrame({'clusterlat': [targetlat], 'clusterlog': [targetlog], 'distance': resultfire[0], 'resultrow': resultfire[1]})
#clu = clue.assign('clusterlat': targetlat, 'clusterlog': targetlog)
indexnum = resultfire[1]
clu1

### Testing to see how to pull out number from DF, to use as input: 
indexnum = clu1.loc[0]['resultrow']
indexnum = int(indexnum)
indexnum

usdafires2011.iloc[indexnum]

unique_lat = usdafires2011.iloc[indexnum]['LATITUDE']
unique_log = usdafires2011.iloc[indexnum]['LONGITUDE']
unique_firename = usdafires2011.iloc[indexnum]['FIRE_NAME']
unique_firesize = usdafires2011.iloc[indexnum]['FIRE_SIZE']
unique_fireclass = usdafires2011.iloc[indexnum]['FIRE_SIZE_CLASS']
unique_discdate = usdafires2011.iloc[indexnum]['DISCOVERY_DOY']
unique_contdate = usdafires2011.iloc[indexnum]['CONT_DOY']

In [None]:
clu2 = clu1.assign(unique_lat1 = unique_lat, unique_log = unique_log, unique_firename = unique_firename,unique_firesize = unique_firesize, unique_fireclass = unique_fireclass,unique_discdate = unique_discdate, unique_contdate = unique_contdate)

In [None]:
clu2

# Perform Function to locate Closest Coordinates and Pull data features from target DF to main DF. 

In [20]:
# Clusterdf will be panda dataframe that has cluster coordinates to cross-ref with the     
def findclustermatch(clusterdf, targetdf):
    clustertotargetdf = pd.DataFrame()
    clustindexnum = 0
    clustindexnum = int(clustindexnum)
    while clustindexnum < len(clusterdf):
        #capture data from target/cluster to then use to find target match in other dataframe:
        clusterid = clusterdf.iloc[clustindexnum]['id']
        clusteryear = clusterdf.iloc[clustindexnum]['year']
        clusterdoy = clusterdf.iloc[clustindexnum]['doy']
        clusterlat = clusterdf.iloc[clustindexnum]['latitude']
        clusterlong = clusterdf.iloc[clustindexnum]['longitude']
        
        # Running cluster find function:
        distance_location = find_firecluster(clusterlat, clusterlong)
        targetlocation = distance_location[1]
        targetlocation = int(targetlocation)
        
        # Lines to pull data from the target dataframe, will need to be customized to the target DF. 
        target_lat = targetdf.iloc[targetlocation]['LATITUDE']
        target_long = targetdf.iloc[targetlocation]['LONGITUDE']
        target_firename = targetdf.iloc[targetlocation]['FIRE_NAME']
        target_firesize = targetdf.iloc[targetlocation]['FIRE_SIZE']
        target_fireclass = targetdf.iloc[targetlocation]['FIRE_SIZE_CLASS']
        target_year = targetdf.iloc[targetlocation]['FIRE_YEAR']
        target_discdoy = targetdf.iloc[targetlocation]['DISCOVERY_DOY']
        target_contdoy = targetdf.iloc[targetlocation]['CONT_DOY']
        
        # Create new DF pulling in features from Cluster Points file and Target File:
        cdftemp = pd.DataFrame({'cluster_index':[clustindexnum], 'cluster_id': [clusterid], 'clusteryear':[clusteryear], 'clusterdoy':[clusterdoy], 'clusterlat': [clusterlat], 'clusterlog': [clusterlong], 
                                'distance': distance_location[0], 'resultrow': distance_location[1], 'targetlat':[target_lat], 'targetlong':[target_long], 'firename':[target_firename],
                                'firesize':[target_firesize], 'fireclass':[target_fireclass],'fire_year':[target_year], 'discovery_doy': [target_discdoy], 'contain_doy': [target_contdoy]})
        
        clustertotargetdf = clustertotargetdf.append(cdftemp, ignore_index = True)
        clustindexnum = clustindexnum + 1
    return clustertotargetdf

In [28]:
cluster_targetmatch = findclustermatch(FCECO2011,usdafires2011)
cluster_targetmatch.tail(50)

Unnamed: 0,cluster_index,cluster_id,clusteryear,clusterdoy,clusterlat,clusterlog,distance,resultrow,targetlat,targetlong,firename,firesize,fireclass,fire_year,discovery_doy,contain_doy
124,124,900615.0,2011,282.0,45.8792,-92.7197,0.112992,41383,45.8784,-92.7206,ST. CROIX,11.0,C,2011,282,282.0
125,125,900626.0,2011,280.0,45.9464,-94.7071,0.111497,43912,45.94578,-94.708234,,120.0,D,2011,279,279.0
126,126,900631.0,2011,103.0,45.9487,-93.0997,0.205871,44851,45.950034,-93.101546,,44.0,C,2011,102,102.0
127,127,900635.0,2011,253.0,43.3531,-122.4238,0.089856,2436,43.3525,-122.423056,CINDER,23.0,C,2011,253,261.0
128,128,900637.0,2011,313.0,44.3622,-75.0886,0.132431,18959,44.36118,-75.08774,DEGRASSE FIRE,29.3,C,2011,313,314.0
129,129,900784.0,2011,282.0,46.2177,-92.3794,0.196257,46486,46.216866,-92.381648,,67.0,C,2011,281,282.0
130,130,900792.0,2011,310.0,46.26,-92.3576,0.133738,44966,46.259959,-92.355861,,85.0,C,2011,309,310.0
131,131,900797.0,2011,290.0,46.2792,-92.4019,0.128563,46516,46.278205,-92.402751,,14.0,C,2011,289,289.0
132,132,900884.0,2011,129.0,46.0309,-85.0277,0.070727,319,46.030833,-85.028611,WORTH ROAD,10.1,C,2011,129,129.0
133,133,900919.0,2011,128.0,46.5376,-94.7702,0.179652,44160,46.537217,-94.767918,,27.0,C,2011,127,127.0


In [None]:
sns.set(style="ticks", color_codes=True)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
sns.palplot(sns.color_palette(flatui))