# Filtering flaring count data

In [91]:
import os
import geopandas as gpd
import pandas as pd
import datetime
from datetime import timedelta
base_dir = os.path.dirname(os.getcwd())

This notebook will take a `geojson` file and filter for a given number of flaring occurrences.

In [102]:
kurdistan_flaring_gdf_2_freq = gpd.read_file(f"{base_dir}/processed_data/kurdistan_data/Kurdistan_weekly_flaring_count.geojson", driver="GeoJSON")

In [103]:
kurdistan_flaring_gdf_2_freq

Unnamed: 0,Lat,Lon,Flaring_time_str,Flaring_timestamp,Count,geometry
0,34.76,45.25,2019-04,1.554073e+09,3,POINT (45.25000 34.76000)
1,34.76,45.22,2020-07,1.593558e+09,4,POINT (45.22000 34.76000)
2,34.76,45.22,2019-01,1.546301e+09,14,POINT (45.22000 34.76000)
3,34.76,45.22,2020-03,1.583021e+09,3,POINT (45.22000 34.76000)
4,34.76,45.22,2018-11,1.541030e+09,4,POINT (45.22000 34.76000)
...,...,...,...,...,...,...
19151,35.65,43.76,2019-05,1.556665e+09,2,POINT (43.76000 35.65000)
19152,35.65,43.75,2019-06,1.559344e+09,2,POINT (43.75000 35.65000)
19153,35.66,43.77,2019-12,1.575158e+09,2,POINT (43.77000 35.66000)
19154,35.66,43.77,2020-04,1.585696e+09,2,POINT (43.77000 35.66000)


In [105]:
kurdistan_flaring_gdf_4_freq = kurdistan_flaring_gdf_2_freq[kurdistan_flaring_gdf_2_freq["Count"]>=4]

In [70]:
kurdistan_flaring_gdf_4_freq.to_file(f"{base_dir}/processed_data/kurdistan_data/Kurdistan_weekly_flaring_count_4.geojson", driver="GeoJSON")

  pd.Int64Index,


# Merging the aggregated locations to original locations based on timestamp and lat lon values

### Merging to get higher resolution lat lon (to avoid the grid-like pattern in visuals)

In [71]:
# reading in raw lat lon values
kurdistan_flare_raw_locations = pd.read_csv(f"{base_dir}/processed_data/kurdistan_data/flaring_group_6dp.csv")

In [112]:
# merge on date first
kurdistan_flaring_gdf_4_freq['Flaring_date'] = kurdistan_flaring_gdf_4_freq['Flaring_timestamp'].apply(
    lambda x: datetime.datetime.fromtimestamp(int(x)).strftime('%Y-%m-%d'))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [127]:
kurdistan_flaring_gdf_4_freq

Unnamed: 0,Lat,Lon,Flaring_time_str,Flaring_timestamp,Count,geometry,Flaring_date
1,34.76,45.22,2020-07,1.593558e+09,4,POINT (45.22000 34.76000),2020-06-30
2,34.76,45.22,2019-01,1.546301e+09,14,POINT (45.22000 34.76000),2018-12-31
4,34.76,45.22,2018-11,1.541030e+09,4,POINT (45.22000 34.76000),2018-10-31
5,34.76,45.22,2019-08,1.564614e+09,5,POINT (45.22000 34.76000),2019-07-31
8,34.76,45.23,2019-08,1.564614e+09,5,POINT (45.23000 34.76000),2019-07-31
...,...,...,...,...,...,...,...
19087,35.61,43.62,2019-09,1.567292e+09,4,POINT (43.62000 35.61000),2019-08-31
19121,35.57,43.74,2020-06,1.590966e+09,4,POINT (43.74000 35.57000),2020-05-31
19122,35.57,43.73,2020-06,1.590966e+09,5,POINT (43.73000 35.57000),2020-05-31
19145,35.62,43.77,2020-06,1.590966e+09,4,POINT (43.77000 35.62000),2020-05-31


In [130]:
kurdistan_flaring_gdf_4_freq["week_beginning"] = kurdistan_flaring_gdf_4_freq.apply(
            lambda row: row.Flaring_date - timedelta(days=row.Flaring_date.weekday()),
            axis=1,
        )
kurdistan_flaring_gdf_4_freq

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Unnamed: 0,Lat,Lon,Flaring_time_str,Flaring_timestamp,Count,geometry,Flaring_date,week_beginning
1,34.76,45.22,2020-07,1.593558e+09,4,POINT (45.22000 34.76000),2020-06-30,2020-06-29
2,34.76,45.22,2019-01,1.546301e+09,14,POINT (45.22000 34.76000),2018-12-31,2018-12-31
4,34.76,45.22,2018-11,1.541030e+09,4,POINT (45.22000 34.76000),2018-10-31,2018-10-29
5,34.76,45.22,2019-08,1.564614e+09,5,POINT (45.22000 34.76000),2019-07-31,2019-07-29
8,34.76,45.23,2019-08,1.564614e+09,5,POINT (45.23000 34.76000),2019-07-31,2019-07-29
...,...,...,...,...,...,...,...,...
19087,35.61,43.62,2019-09,1.567292e+09,4,POINT (43.62000 35.61000),2019-08-31,2019-08-26
19121,35.57,43.74,2020-06,1.590966e+09,4,POINT (43.74000 35.57000),2020-05-31,2020-05-25
19122,35.57,43.73,2020-06,1.590966e+09,5,POINT (43.73000 35.57000),2020-05-31,2020-05-25
19145,35.62,43.77,2020-06,1.590966e+09,4,POINT (43.77000 35.62000),2020-05-31,2020-05-25


## Raw lcoations

In [131]:
kurdistan_flare_raw_locations = pd.read_csv(f"{base_dir}/processed_data/kurdistan_data/flaring_group_6dp.csv")

In [132]:
kurdistan_flare_raw_locations['Date'] = kurdistan_flare_raw_locations['Date_LTZ'].apply(
    lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f').date())

In [147]:
kurdistan_flare_raw_locations["week_beginning"] = kurdistan_flare_raw_locations.apply(
            lambda row: row.Date - timedelta(days=row.Date.weekday()),
            axis=1,
        )
kurdistan_flare_raw_locations

Unnamed: 0,Lon,Lat,Date_LTZ,Date,geometry,week_beginning
0,42.797478,37.139473,2022-04-29 01:43:58.874,2022-04-29,POINT (42.79748 37.13947),2022-04-25
1,42.558681,37.075752,2022-04-29 01:44:00.653,2022-04-29,POINT (42.55868 37.07575),2022-04-25
2,43.386925,36.914726,2022-04-29 01:44:00.653,2022-04-29,POINT (43.38692 36.91473),2022-04-25
3,43.396919,36.913467,2022-04-29 01:44:00.653,2022-04-29,POINT (43.39692 36.91347),2022-04-25
4,43.385841,36.907597,2022-04-29 01:44:00.653,2022-04-29,POINT (43.38584 36.90760),2022-04-25
...,...,...,...,...,...,...
204547,45.160320,34.779396,2018-11-26 00:41:54.615,2018-11-26,POINT (45.16032 34.77940),2018-11-26
204548,45.121826,34.765419,2018-11-26 00:41:54.615,2018-11-26,POINT (45.12183 34.76542),2018-11-26
204549,45.194828,34.764164,2018-11-26 00:41:54.615,2018-11-26,POINT (45.19483 34.76416),2018-11-26
204550,44.989986,34.209972,2018-11-26 00:42:03.511,2018-11-26,POINT (44.98999 34.20997),2018-11-26


### Creating geodataframe from more accurate location points

In [148]:
kurdistan_flare_raw_locations_gdf = gpd.GeoDataFrame(
    kurdistan_flare_raw_locations, geometry=gpd.points_from_xy(kurdistan_flare_raw_locations.Lon, kurdistan_flare_raw_locations.Lat))

In [149]:
kurdistan_flare_raw_locations_gdf = kurdistan_flare_raw_locations_gdf.set_crs(4326)

In [150]:
kurdistan_flare_raw_locations_gdf["Flaring_time_str"] = kurdistan_flare_raw_locations_gdf['Date'].apply(lambda x: "-".join(str(x).split('-',2)[:2]))


In [152]:
kurdistan_flare_raw_locations_gdf["Lon_2dp"] = kurdistan_flare_raw_locations_gdf["Lon"].round(2)
kurdistan_flare_raw_locations_gdf["Lat_2dp"] = kurdistan_flare_raw_locations_gdf["Lat"].round(2)


In [153]:
kurdistan_flare_raw_locations_gdf

Unnamed: 0,Lon,Lat,Date_LTZ,Date,geometry,week_beginning,Flaring_time_str,Lon_2dp,Lat_2dp
0,42.797478,37.139473,2022-04-29 01:43:58.874,2022-04-29,POINT (42.79748 37.13947),2022-04-25,2022-04,42.80,37.14
1,42.558681,37.075752,2022-04-29 01:44:00.653,2022-04-29,POINT (42.55868 37.07575),2022-04-25,2022-04,42.56,37.08
2,43.386925,36.914726,2022-04-29 01:44:00.653,2022-04-29,POINT (43.38692 36.91473),2022-04-25,2022-04,43.39,36.91
3,43.396919,36.913467,2022-04-29 01:44:00.653,2022-04-29,POINT (43.39692 36.91347),2022-04-25,2022-04,43.40,36.91
4,43.385841,36.907597,2022-04-29 01:44:00.653,2022-04-29,POINT (43.38584 36.90760),2022-04-25,2022-04,43.39,36.91
...,...,...,...,...,...,...,...,...,...
204547,45.160320,34.779396,2018-11-26 00:41:54.615,2018-11-26,POINT (45.16032 34.77940),2018-11-26,2018-11,45.16,34.78
204548,45.121826,34.765419,2018-11-26 00:41:54.615,2018-11-26,POINT (45.12183 34.76542),2018-11-26,2018-11,45.12,34.77
204549,45.194828,34.764164,2018-11-26 00:41:54.615,2018-11-26,POINT (45.19483 34.76416),2018-11-26,2018-11,45.19,34.76
204550,44.989986,34.209972,2018-11-26 00:42:03.511,2018-11-26,POINT (44.98999 34.20997),2018-11-26,2018-11,44.99,34.21


In [158]:
kurdistan_flare_raw_locations_gdf["week_beginning"] = pd.to_datetime(kurdistan_flare_raw_locations_gdf["week_beginning"])

In [165]:
kurdistan_flare_raw_locations_gdf

Unnamed: 0,Lon,Lat,Date_LTZ,Date,geometry,week_beginning,Flaring_time_str,Lon_2dp,Lat_2dp
0,42.797478,37.139473,2022-04-29 01:43:58.874,2022-04-29,POINT (42.79748 37.13947),2022-04-25,2022-04,42.80,37.14
1,42.558681,37.075752,2022-04-29 01:44:00.653,2022-04-29,POINT (42.55868 37.07575),2022-04-25,2022-04,42.56,37.08
2,43.386925,36.914726,2022-04-29 01:44:00.653,2022-04-29,POINT (43.38692 36.91473),2022-04-25,2022-04,43.39,36.91
3,43.396919,36.913467,2022-04-29 01:44:00.653,2022-04-29,POINT (43.39692 36.91347),2022-04-25,2022-04,43.40,36.91
4,43.385841,36.907597,2022-04-29 01:44:00.653,2022-04-29,POINT (43.38584 36.90760),2022-04-25,2022-04,43.39,36.91
...,...,...,...,...,...,...,...,...,...
204547,45.160320,34.779396,2018-11-26 00:41:54.615,2018-11-26,POINT (45.16032 34.77940),2018-11-26,2018-11,45.16,34.78
204548,45.121826,34.765419,2018-11-26 00:41:54.615,2018-11-26,POINT (45.12183 34.76542),2018-11-26,2018-11,45.12,34.77
204549,45.194828,34.764164,2018-11-26 00:41:54.615,2018-11-26,POINT (45.19483 34.76416),2018-11-26,2018-11,45.19,34.76
204550,44.989986,34.209972,2018-11-26 00:42:03.511,2018-11-26,POINT (44.98999 34.20997),2018-11-26,2018-11,44.99,34.21


### Merging the two dataframes on the week beginning

In [170]:
new_df = pd.merge(kurdistan_flaring_gdf_4_freq, kurdistan_flare_raw_locations_gdf,  
    how='inner', 
    left_on=["Lon", "Lat","week_beginning"],
    right_on=["Lon_2dp","Lat_2dp","week_beginning"], 
)

In [173]:
new_gdf = new_df.set_geometry("geometry_y")

In [181]:
new_gdf[["Lat_x","Lon_x","Flaring_time_str_x","Count", "Flaring_date","Date_LTZ","geometry_y","Lon_2dp","Lat_2dp"]].to_file(f"{base_dir}/processed_data/kurdistan_data/Kurdistan_weekly_flaring_count_joined_week_beginning.geojson")

  pd.Int64Index,
