In [81]:
import pandas as pd
from geopy.distance import great_circle
import numpy as np
import geopandas as gpd
from shapely.geometry import Point

In [82]:
weather_2012 = pd.read_csv('filtered_weather_2012.csv')
wildfire_2012 = pd.read_csv('viirs_2012.csv')

In [83]:
wildfire_df= wildfire_2012[['latitude','longitude','acq_date','confidence']]
weather_df=weather_2012[['LATITUDE','LONGITUDE','DATE','PRCP','TMIN','TMAX']]

In [84]:
wildfire_df=wildfire_df[wildfire_df['confidence']=='h']

In [None]:
# Define the latitude and longitude boundaries of British Columbia
lat_min, lat_max = 48.3, 60.0
lon_min, lon_max = -139.05, -114.03

# Apply boundaries to dataset
wildfire_df = wildfire_df[
    (wildfire_df['latitude'] >= lat_min) & (wildfire_df['latitude'] <= lat_max) &
    (wildfire_df['longitude'] >= lon_min) & (wildfire_df['longitude'] <= lon_max)
]

In [86]:
wildfire_df

Unnamed: 0,latitude,longitude,acq_date,confidence
25,58.728439,-117.236771,2012-01-22,h
510,57.025085,-121.934845,2012-02-01,h
807,53.131313,-115.990082,2012-02-04,h
1170,50.940090,-114.169312,2012-02-08,h
1444,58.049400,-114.157494,2012-02-12,h
...,...,...,...,...
184053,49.603424,-120.053345,2012-11-20,h
184405,51.392712,-122.958649,2012-11-23,h
185073,52.061211,-122.916931,2012-12-04,h
185716,58.113380,-116.029503,2012-12-19,h


In [87]:
#wildfire_df = wildfire_df.sample(n=1000)

In [88]:
wildfire_df.reset_index(drop=True, inplace=True)


In [89]:
wildfire_df

Unnamed: 0,latitude,longitude,acq_date,confidence
0,58.728439,-117.236771,2012-01-22,h
1,57.025085,-121.934845,2012-02-01,h
2,53.131313,-115.990082,2012-02-04,h
3,50.940090,-114.169312,2012-02-08,h
4,58.049400,-114.157494,2012-02-12,h
...,...,...,...,...
3158,49.603424,-120.053345,2012-11-20,h
3159,51.392712,-122.958649,2012-11-23,h
3160,52.061211,-122.916931,2012-12-04,h
3161,58.113380,-116.029503,2012-12-19,h


In [None]:
# Convert the date columns to datetime
wildfire_df['acq_date'] = pd.to_datetime(wildfire_df['acq_date'])
weather_df['DATE'] = pd.to_datetime(weather_df['DATE'])

# Create GeoDataFrames from the dataframes
wildfire_gdf = gpd.GeoDataFrame(
    wildfire_df, 
    geometry=gpd.points_from_xy(wildfire_df.longitude, wildfire_df.latitude),
    crs="EPSG:4326" 
)

weather_gdf = gpd.GeoDataFrame(
    weather_df, 
    geometry=gpd.points_from_xy(weather_df.LONGITUDE, weather_df.LATITUDE),
    crs="EPSG:4326"
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  weather_df['DATE'] = pd.to_datetime(weather_df['DATE'])


In [None]:
# Perform an initial merge on the date column
merged_gdf = wildfire_gdf.merge(weather_gdf, left_on='acq_date', right_on='DATE', suffixes=('_fire', '_weather'))

# calculate the distance between each wildfire point and weather station point
merged_gdf['distance'] = merged_gdf.apply(
    lambda row: row['geometry_fire'].distance(row['geometry_weather']),
    axis=1
)

In [92]:
merged_gdf

Unnamed: 0,latitude,longitude,acq_date,confidence,geometry_fire,LATITUDE,LONGITUDE,DATE,PRCP,TMIN,TMAX,geometry_weather,distance
0,58.728439,-117.236771,2012-01-22,h,POINT (-117.23677 58.72844),50.9000,-119.9167,2012-01-22,0.31,9.0,12.0,POINT (-119.91670 50.90000),8.274447
1,58.728439,-117.236771,2012-01-22,h,POINT (-117.23677 58.72844),50.0667,-122.9500,2012-01-22,1.06,14.0,23.0,POINT (-122.95000 50.06670),10.376257
2,58.728439,-117.236771,2012-01-22,h,POINT (-117.23677 58.72844),50.6833,-121.9333,2012-01-22,,21.0,40.0,POINT (-121.93330 50.68330),9.315667
3,58.728439,-117.236771,2012-01-22,h,POINT (-117.23677 58.72844),50.2167,-121.5833,2012-01-22,0.31,28.0,39.0,POINT (-121.58330 50.21670),9.557302
4,58.728439,-117.236771,2012-01-22,h,POINT (-117.23677 58.72844),52.1167,-124.1333,2012-01-22,,-15.0,33.0,POINT (-124.13330 52.11670),9.553910
...,...,...,...,...,...,...,...,...,...,...,...,...,...
61508,58.114071,-116.030380,2012-12-19,h,POINT (-116.03038 58.11407),50.9000,-119.9167,2012-12-19,0.94,4.0,17.0,POINT (-119.91670 50.90000),8.194285
61509,58.114071,-116.030380,2012-12-19,h,POINT (-116.03038 58.11407),49.1333,-123.0000,2012-12-19,1.20,32.0,41.0,POINT (-123.00000 49.13330),11.367931
61510,58.114071,-116.030380,2012-12-19,h,POINT (-116.03038 58.11407),49.0500,-123.8667,2012-12-19,0.62,29.0,40.0,POINT (-123.86670 49.05000),11.981874
61511,58.114071,-116.030380,2012-12-19,h,POINT (-116.03038 58.11407),49.1833,-119.5500,2012-12-19,0.16,19.0,36.0,POINT (-119.55000 49.18330),9.599291


In [None]:
# Group by the unique identifiers of the wildfire events (date and location)
grouped = merged_gdf.groupby(['acq_date', 'geometry_fire'])

# Sort the groups by distance and take the first one, which is the closest weather station
closest_stations = grouped.apply(lambda x: x.sort_values('distance').head(1))

# Reset the index - because groupby + apply can result in a multi-index
closest_stations = closest_stations.reset_index(drop=True)

#closest_stations should contain only the closest weather station for each wildfire event

In [94]:
closest_stations

Unnamed: 0,latitude,longitude,acq_date,confidence,geometry_fire,LATITUDE,LONGITUDE,DATE,PRCP,TMIN,TMAX,geometry_weather,distance
0,58.728439,-117.236771,2012-01-22,h,POINT (-117.23677 58.72844),53.0667,-121.5167,2012-01-22,0.12,3.0,27.0,POINT (-121.51670 53.06670),7.097400
1,57.025085,-121.934845,2012-02-01,h,POINT (-121.93484 57.02508),53.0333,-122.5167,2012-02-01,0.08,21.0,38.0,POINT (-122.51670 53.03330),4.033969
2,53.131313,-115.990082,2012-02-04,h,POINT (-115.99008 53.13131),51.5833,-119.7833,2012-02-04,0.00,16.0,32.0,POINT (-119.78330 51.58330),4.096931
3,50.940090,-114.169312,2012-02-08,h,POINT (-114.16931 50.94009),51.5833,-119.7833,2012-02-08,0.00,9.0,30.0,POINT (-119.78330 51.58330),5.650715
4,58.049400,-114.157494,2012-02-12,h,POINT (-114.15749 58.04940),51.5833,-119.7833,2012-02-12,0.08,32.0,45.0,POINT (-119.78330 51.58330),8.570889
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3158,49.603424,-120.053345,2012-11-20,h,POINT (-120.05334 49.60342),49.7833,-119.7167,2012-11-20,0.19,43.0,47.0,POINT (-119.71670 49.78330),0.381687
3159,51.392712,-122.958649,2012-11-23,h,POINT (-122.95865 51.39271),50.3000,-122.7333,2012-11-23,0.62,31.0,36.0,POINT (-122.73330 50.30000),1.115707
3160,52.061211,-122.916931,2012-12-04,h,POINT (-122.91693 52.06121),52.3833,-122.3667,2012-12-04,0.00,26.0,53.0,POINT (-122.36670 52.38330),0.637570
3161,58.113380,-116.029503,2012-12-19,h,POINT (-116.02950 58.11338),53.0667,-121.5167,2012-12-19,0.41,-4.0,25.0,POINT (-121.51670 53.06670),7.455086


In [95]:
closest_stations['week_of_year'] = closest_stations['acq_date'].dt.isocalendar().week

In [96]:
closest_stations.to_csv("merged_2012.csv")