# Atmospheric composition distribution by CAMS and TROPOMI

### Main goal:
* Compare TROPOMI and CAMS datasets for NO<sub>2</sub>, CO, O<sub>3</sub> and SO<sub>2</sub> (Level 2).

## Imports and supporting functions

In [None]:
import os 
from sentinelsat.sentinel import SentinelAPI, geojson_to_wkt
import cdsapi
import cfgrib
import xarray as xr
import pandas as pd
import numpy as np
from itertools import product
import scipy.interpolate
from copy import copy
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import cartopy.crs as ccrs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import cartopy.feature as cfeature
import math
import geojson
from sklearn.linear_model import LinearRegression
from scipy.spatial.distance import cdist
import datetime as dt
import geocoder
import seaborn as sns
from matplotlib.cbook import boxplot_stats  

In [None]:
%run functions/functions_general.ipynb
%run functions/functions_cams.ipynb
%run functions/functions_tropomi.ipynb

In [None]:
# Hide pandas warning
pd.options.mode.chained_assignment = None

## 1. Define datasets

In [None]:
# Define component
component_nom = 'O3'

# Define sensor
sensor = 'tropomi'
sensor_type = 'L2'

# Define model
model = 'cams'
model_full_name = 'cams-global-atmospheric-composition-forecasts'

# Check if comparison is possible
comparison_check(sensor, model, component_nom, model_full_name, sensor_type)

# Get component full name and molecular weight
component, component_mol_weight, product_type, sensor_column = components_table(sensor, component_nom)

# Folders generation
generate_folders(model, sensor, component_nom)

In [None]:
# Define extent
lon_min = 12
lon_max = 18.9
lat_min = 48.5
lat_max = 51.1

# Create bbox
bbox = ((lon_min, lat_min), (lon_max, lat_max))

# Define search period
start_date = '2021-08-21'
end_date = '2021-08-22'
dates = search_period(start_date, end_date, sensor, sensor_type)

## 2. Model data

### Download and read data as xarray

In [None]:
model_product_name, model_type = CAMS_download(dates, start_date, end_date, component, 
                                               component_nom, model_full_name, model_level = 'Multiple')

In [None]:
model_ds, _ = CAMS_read(model_product_name, component, component_nom, dates)
model_ds

### Collect 137 levels model data

In [None]:
model_levels_df = CAMS_137_levels()
model_levels_df

### Convert CAMS data units (kg/kg to molecules/cm<sup>2</sup>)

In [None]:
model_ds, units = model_convert_units(model_ds, model, component_mol_weight, component, conversion_method = 'Simple')

### Download read and subset original total columns data

In [None]:
model_product_name, model_type = CAMS_download(dates, start_date, end_date, component, component_nom, 
                                               model_full_name, model_level = 'Single')
model_total_ds, _ = CAMS_read(model_product_name, component, component_nom, dates)
model_total_ds = subset(model_total_ds, bbox)
model_total_ds

### Convert CAMS total columns data units (kg/m2 to molecules/cm<sup>2</sup>)

In [None]:
model_total_ds, _ = model_convert_units(model_total_ds, model, component_mol_weight, component, conversion_method = 'Simple')

## 3. Sensor data

### Download and read data as xarray

In [None]:
sensor_product_names = sensor_download(sensor, sensor_type, component_nom, bbox, dates, product_type)

In [None]:
sensor_ds, support_input_ds, support_details_ds = sensor_read(sensor, sensor_type, component_nom, sensor_product_names)
sensor_ds

### Convert TROPOMI data units (From mol/m<sup>2</sup> to molecules/cm<sup>2</sup>)

In [None]:
sensor_ds = sensor_convert_units(sensor_ds, sensor_column, sensor, component)
sensor_ds

## 4. Match data (and apply averaging kernels)

In [None]:
kernels_method = None # None, Interpolation or Nearest neighbours
match_table = generate_match_table(sensor_ds, model_ds, bbox, kernels_method, 
                                   sensor, component_nom, sensor_column, model_levels_df)
match_table

## 5. Merge data and calculate difference

In [None]:
merge_table = generate_merge_table(match_table, sensor_ds, model_ds, kernels_method, sensor_column, sensor)
merge_table

In [None]:
descr_statistics_table = merge_table.describe()
descr_statistics_table

## 6. Filtering

In [None]:
#filter_negative = True
#merge_table = filter_merge_table(merge_table, sensor_column, component_nom, units, filter_negative)

## 7. Show comparison plots

### Select plot dates

In [None]:
plot_dates = plot_period(sensor_ds, sensor)

### Select plot extent

In [None]:
plot_bbox = plot_extent(bbox)

### Compare CAMS and TROPOMI total columns

In [None]:
range_type = 'equal' # original or equal
distribution_type = 'individual' # aggregated or individual

projection = ccrs.PlateCarree()
color_scale = 'coolwarm'

visualize_model_vs_sensor(model, sensor, component_nom, units, merge_table, plot_dates, plot_bbox, 20, 0.80, 
                          model_type, sensor_type, range_type, sensor_column, distribution_type, projection,
                          color_scale)

### Compare CAMS original and averaged total columns

In [None]:
range_type = 'equal' # original or equal

projection = ccrs.PlateCarree()
color_scale = 'coolwarm' 

visualize_model_original_vs_calculated(model, component_nom,units, merge_table, model_total_ds, plot_dates, 
                                       plot_bbox, 20, 1, model_type, range_type, projection, color_scale)

## 8. Retrieve nearest values to specific latitude and longitude

In [None]:
coords_search_list = (50, 60,
                      4, 10,
                      20, 30)
coords_search = pairwise(coords_search_list)
retrieval_table_all =  retrieve_coords(merge_table.dropna(), coords_search, component_nom, 
                                       sensor_column, sensor, model, plot_dates, units)
retrieval_table_all

## 9. Scatter plots by bbox

In [None]:
show_seasons = False
extent_definition = 'bbox' # bbox or country
scatter_plot_type = 'aggregated' # aggregated or individual
summary = scatter_plot(merge_table.dropna(), component_nom, units, sensor_column, sensor, 
                       plot_dates, 1.05, extent_definition, show_seasons, scatter_plot_type, plot_bbox)

In [None]:
summary

## 10. Scatter plots by season

In [None]:
show_seasons = True
extent_definition = 'bbox' # bbox or country
scatter_plot_type = 'individual' # aggregated or individual
summary = scatter_plot(merge_table.dropna(), component_nom, units, sensor_column, sensor, 
                       plot_dates, 1.05, extent_definition, show_seasons, scatter_plot_type, plot_bbox)

In [None]:
summary

## 11. Scatter plots by country (Google API required!)

In [None]:
"""
show_seasons = False
extent_definition = 'country' # bbox or country
scatter_plot_type = 'aggregated' # aggregated or individual
plot_countries = ['Czech Republic', 'Poland', 'Germany']
scatter_plot(merge_table.dropna(), component_nom, units, sensor_column, sensor, 
             plot_dates, 1.05, extent_definition, show_seasons, scatter_plot_type, plot_countries)
"""

In [None]:
#summary