## Please provide your inputs as followed:

- **start_date**: The start date of your period of interest in the format **dd mm YYYY**
- **end_date**: The end date of your period of interestin the format **dd mm YYYY**
- **region**: The region of interest. Please provide this in a **bounding box format** (e.g. [10, -5, 25, 20]) or a **country name**

In [None]:
start_date = "2023 08 20"
end_date = "2023 08 25"
region = "Australia"

## Library imports 

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import xarray as xr
import fsspec
import numpy as np
import xarray as xr
import planetary_computer
import pystac_client
import geopandas as gpd
import pandas as pd
import cartopy.crs as ccrs
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime, time
import json
import stackstac

## Starting up PySTAC client

In [None]:
# Initialize PySTAC client for data query
planetary_computer.set_subscription_key("c27669c4bdec434d804e2bd738cb16fc")
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)

## User Input

### Processing of user input

In [None]:
# Function to convert date format 
def convert_format_date(input_date):
    correct_formats = ["%d %m %Y", "%Y %m %d", "%d/%m/%Y", "%Y/%m/%d", "%d-%m-%Y", "%Y-%m-%d"]
    
    for format_str in correct_formats:
        try:
            date_obj = datetime.strptime(input_date, format_str)
            formatted_date = date_obj.strftime("%Y-%m-%d")
            return formatted_date
        except ValueError: # Raised if input format is not compatible with set standard 
            pass
    
    raise ValueError("Invalid data format")

# Convert user start date format
try:
    start_date = convert_format_date(start_date)
except ValueError:
    print("Invalid start date format. Please check the acceptable formats")
            
# Convert user end date format
try:
    end_date = convert_format_date(end_date)
except ValueError:
    print("Invalid end date format. Please check the acceptable formats")

date_period = start_date + "/" + end_date 
print(date_period)

In [None]:
date_range = np.arange(np.datetime64(start_date), np.datetime64(end_date))
date_range 

In [None]:
time_difference = (np.datetime64(end_date) - np.datetime64(start_date)).astype(int)
time_difference

## Search for product

### Search based on country input

In [None]:
# Get geopandas in-built naturalearth_lowres dataset
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

In [None]:
ROI = world[world["name"] == region]
ROI_bbox = ROI.total_bounds

In [None]:
def search_catalog(region, date_period):
    search_parameters = {
        "collections": "sentinel-5p-l2-netcdf",
        "datetime": date_period,
        "query": {"s5p:processing_mode": {"eq": "OFFL"}, "s5p:product_name": {"eq": ""}},
    }
   
    #bbox input
    if isinstance(region, list) and len(region) == 4:
        min_long, min_lat, max_long, max_lat = region
        #-180 to 180 for longitudes, -90 to 90 for latitudes
        long = all(-180 <= coordinates <= 180 for coordinates in [min_long, max_long]) 
        lat = all(-90 <= coordinates <= 90 for coordinates in [min_lat, max_lat])
        
        if long and lat:
            search_parameters["bbox"] = region
        elif not long:
            raise ValueError("Invalid longitudes in bbox")
        elif not lat:
            raise ValueError("Invalid latitudes in bbox")
        else:
            raise ValueError("Invalid coordinates in bbox")
        
    else:
        if ROI.empty: # Raise error if invalid region input
            raise ValueError("Invalid country name")
        
        gjson = json.loads(ROI.to_json())
        coordinates = gjson["features"][0]["geometry"]["coordinates"]
        
        if not isinstance(coordinates, list): 
            coordinates = [coordinates]    
                         
        #MultiPolygon is used to represent multiple polygons bbox and country     
        search_parameters["intersects"] = {
            "type": "MultiPolygon", 
            "coordinates": coordinates
        }
        
    search = catalog.search(**search_parameters)
    items = search.item_collection()

    return items

# Use search_catalog function with a single variable "region" for bbox and country name
result = search_catalog(region=region, date_period=date_period)

# Print the result
print(f"Number of items for input: {len(result)}")

## Process queried data into a dataset

In [None]:
item_links = [item.assets['ch4'].href for item in result]
item_links

f = fsspec.open_files(item_links)
f = [file.open() for file in f]

In [None]:
datasets = [xr.open_dataset(nc_file, group="PRODUCT", engine="h5netcdf") for nc_file in f]

In [None]:
d = xr.open_mfdataset(f, group="PRODUCT", engine="h5netcdf", concat_dim="time", combine='nested', join="outer") 
d

In [None]:
# Group datasets with similar time
d_g = d.methane_mixing_ratio_bias_corrected.groupby("time")
d_g.groups

In [None]:
d_g[np.datetime64('2023-08-20T00:00:00.000000000')]

In [None]:
for date, dataset in d_g:
    # Initialize the map
    fig, ax = plt.subplots(1, 1, figsize=(12, 8), subplot_kw={'projection': ccrs.PlateCarree()})
    continent_borders = world.dissolve(by='continent')
    continent_borders.boundary.plot(ax=ax, linewidth=1, color='black')
    fig.colorbar(colormesh, pad=0.05, shrink=0.35, label="methane (mol/m2)")
    plt.title(str(date))
    ax.set_xlim(ROI_bbox[0], ROI_bbox[2])
    ax.set_ylim(ROI_bbox[1], ROI_bbox[3])

    for time in dataset:
        # Extract the relevant data (assuming the variable name is 'methane_mixing_ratio_bias_corrected')
        data = time.values #print(data.values)
        lon = time.longitude.values
        lat = time.latitude.values

        # Calculate vmin and vmax for color normalization
        vmin, vmax = np.nanpercentile(data, [1, 99])

        # Plot the data
        norm = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)
        colormesh = ax.pcolor(lon, lat, data, cmap="Spectral_r", norm=norm, transform=ccrs.PlateCarree(), alpha=0.9, rasterized=True)
        plt.savefig("output/concentration_map" + str(date) + ".jpg")
        
    
plt.show()

In [None]:
import glob
from PIL import Image

frames = [Image.open(image) for image in glob.glob(f"output/*.jpg")]
frame_one = frames[0]
frame_one.save("time_series.gif", format="GIF", append_images=frames,
           save_all=True, duration=1500, loop=0)