# Filtering flaring count data

In [None]:
import os
import geopandas as gpd
import pandas as pd
import datetime
from datetime import timedelta
base_dir = os.path.dirname(os.getcwd())

This notebook will take a `geojson` file and filter for a given number of flaring occurrences.

In [None]:
kurdistan_flaring_gdf_2_freq = gpd.read_file(f"{base_dir}/processed_data/kurdistan_data/Kurdistan_weekly_flaring_count.geojson", driver="GeoJSON")

In [None]:
kurdistan_flaring_gdf_2_freq

In [None]:
kurdistan_flaring_gdf_4_freq = kurdistan_flaring_gdf_2_freq[kurdistan_flaring_gdf_2_freq["Count"]>=4]

In [None]:
kurdistan_flaring_gdf_4_freq.to_file(f"{base_dir}/processed_data/kurdistan_data/Kurdistan_weekly_flaring_count_4.geojson", driver="GeoJSON")

# Merging the aggregated locations to original locations based on timestamp and lat lon values

### Merging to get higher resolution lat lon (to avoid the grid-like pattern in visuals)

In [None]:
# reading in raw lat lon values
kurdistan_flare_raw_locations = pd.read_csv(f"{base_dir}/processed_data/kurdistan_data/flaring_group_6dp.csv")

In [None]:
# merge on date first
kurdistan_flaring_gdf_4_freq['Flaring_date'] = kurdistan_flaring_gdf_4_freq['Flaring_timestamp'].apply(
    lambda x: datetime.datetime.fromtimestamp(int(x)).strftime('%Y-%m-%d'))


In [None]:
kurdistan_flaring_gdf_4_freq

In [None]:
kurdistan_flaring_gdf_4_freq["week_beginning"] = kurdistan_flaring_gdf_4_freq.apply(
            lambda row: row.Flaring_date - timedelta(days=row.Flaring_date.weekday()),
            axis=1,
        )
kurdistan_flaring_gdf_4_freq

## Raw lcoations

In [None]:
kurdistan_flare_raw_locations = pd.read_csv(f"{base_dir}/processed_data/kurdistan_data/flaring_group_6dp.csv")

In [None]:
kurdistan_flare_raw_locations['Date'] = kurdistan_flare_raw_locations['Date_LTZ'].apply(
    lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f').date())

In [None]:
kurdistan_flare_raw_locations["week_beginning"] = kurdistan_flare_raw_locations.apply(
            lambda row: row.Date - timedelta(days=row.Date.weekday()),
            axis=1,
        )
kurdistan_flare_raw_locations

### Creating geodataframe from more accurate location points

In [None]:
kurdistan_flare_raw_locations_gdf = gpd.GeoDataFrame(
    kurdistan_flare_raw_locations, geometry=gpd.points_from_xy(kurdistan_flare_raw_locations.Lon, kurdistan_flare_raw_locations.Lat))

In [None]:
kurdistan_flare_raw_locations_gdf = kurdistan_flare_raw_locations_gdf.set_crs(4326)

In [None]:
kurdistan_flare_raw_locations_gdf["Flaring_time_str"] = kurdistan_flare_raw_locations_gdf['Date'].apply(lambda x: "-".join(str(x).split('-',2)[:2]))


In [None]:
kurdistan_flare_raw_locations_gdf["Lon_2dp"] = kurdistan_flare_raw_locations_gdf["Lon"].round(2)
kurdistan_flare_raw_locations_gdf["Lat_2dp"] = kurdistan_flare_raw_locations_gdf["Lat"].round(2)


In [None]:
kurdistan_flare_raw_locations_gdf

In [None]:
kurdistan_flare_raw_locations_gdf["week_beginning"] = pd.to_datetime(kurdistan_flare_raw_locations_gdf["week_beginning"])

In [None]:
kurdistan_flare_raw_locations_gdf

### Merging the two dataframes on the week beginning

In [None]:
new_df = pd.merge(kurdistan_flaring_gdf_4_freq, kurdistan_flare_raw_locations_gdf,  
    how='inner', 
    left_on=["Lon", "Lat","week_beginning"],
    right_on=["Lon_2dp","Lat_2dp","week_beginning"], 
)

In [None]:
new_gdf = new_df.set_geometry("geometry_y")

In [None]:
new_gdf[["Lat_x","Lon_x","Flaring_time_str_x","Count", "Flaring_date","Date_LTZ","geometry_y","Lon_2dp","Lat_2dp"]].to_file(f"{base_dir}/processed_data/kurdistan_data/Kurdistan_weekly_flaring_count_joined_week_beginning.geojson")