# Compare radar precipitation data to era5 and station data

Radar data retrieved from the [CEDA Archive](https://catalogue.ceda.ac.uk/uuid/27dd6ffba67f667a18c62de5c3456350) compared to era5 has much higher spatial resolution (1km vs 10km) and temporal resolution (5min vs 1hour). However, accuraccy is crucial for this project. That is why we compare radar precipitation data from era5 and weather stations to decide if it is more precise than previously used era5 data.

We plot:

    - maps of radar data with overlaid data from weather stations (also reduced to Tweets that are very close to weather stations)
    - maps that show weather stations and Tweet data from weather stations to verify that Tweets were correctly tagged.
    - histograms of precipitation comparing era5 vs stations and radar vs stations  

Radar data seems to inhibit systematic uncertainties especially with rain that is not detected by weather stations with an overall increased inaccuracy compared to era5 data. In conclusion, we abstain from using radar data for now (see also notebook radar_era5_station_comparison_timeline for analysis of precipitation timeseries near weather stations).

In [None]:
# allows update of external libraries without need to reload package
%load_ext autoreload
%autoreload 2

In [None]:
import os
import re
import glob
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import logging
import pathlib
import datetime
import tqdm
import xarray
import rioxarray
import convertbng
import pyproj
import h5py
import itertools
import functools
import collections
import plotly.express

logging.basicConfig(level=logging.INFO)


import a2.utils
import a2.dataset
import a2.plotting

In [None]:
FOLDER_DATA = a2.utils.file_handling.get_folder_data()
FOLDER_TWEETS = FOLDER_DATA / "tweets/"
FILE_TWEETS = (
    FOLDER_TWEETS
    / "2017_2020_tweets_rain_sun_vocab_emojis_locations_bba_Tp_era5_no_bots_normalized_filtered_weather_stations_fix_predicted_simpledeberta_radar.nc"
)
FOLDER_WEATHER_STATIONS = FOLDER_DATA / "weather_stations/"
FILE_WEATHER_STATIONS = FOLDER_WEATHER_STATIONS / "weather_stations_hourly_rainfall_uk_2017-2020_station_number.csv"

FOLDER_ERA5 = FOLDER_DATA / "precipitation/"
FILE_ERA5 = FOLDER_ERA5 / "ds_prec_era5_uk_2017-2020_decum.nc"

FOLDER_RADAR_DAPCEDA = a2.utils.file_handling.get_folder_radar()

In [None]:
df_stations = a2.dataset.load_dataset.load_weather_stations(FILE_WEATHER_STATIONS)

In [None]:
ds_tweets = a2.dataset.load_dataset.load_tweets_dataset(FILE_TWEETS)

In [None]:
# faster for some operations with xarray
ds_t = xarray.open_dataset(FILE_TWEETS)

## Maps comparing station and radar precipitation estimates

In [None]:
a2.plotting.weather_maps.plot_tp_station_tweets(
    ds_t,
    df_stations.reset_index(),  # df_stations,
    grid_shape=(3, 2),
    colormap="tab20c",
    vmin=0,
    vmax=1,
    fontsize=14,
    choice_type="increment_time",
    increment_time_value=np.datetime64("2017-05-27T12:30:00.000000000"),
)

In [None]:
f = a2.plotting.weather_maps.plot_radar_map_with_tweets(
    ds=ds_t,
    grid_shape=(4, 4),
    figsize=None,
    selection_delta_time=1,
    selection_delta_time_units="h",
    selection_key_twitter_time="time_radar",
    selector_use_limits=[True, False],
    # choice_type="increment_time",
    choice_type="increment_time",
    xlim=(-1.5, 1.5),
    ylim=(50, 53),
    path_to_dapceda=FOLDER_RADAR_DAPCEDA,
    increment_time_value=np.datetime64("2019-10-09T11:00:00.000000000"),
    vmax=1,
    cumulative_radar=True,
    # circle_size_constant=0.1,
)

## Histograms radar vs weather station

In [None]:
ds_no_nan = ds_tweets.where(
    ~a2.dataset.utils_dataset.is_nan(ds_tweets, "station_tp_mm")
    & ~a2.dataset.utils_dataset.is_nan(ds_tweets, "tp_mm_radar")
    & (ds_tweets.tp_mm_radar != -np.inf),
    drop=True,
)
ds_no_nan

In [None]:
ds_no_nan.where(ds_no_nan.station_distance_km < 0.4, drop=True)

In [None]:
ds_close_station = ds_no_nan.where(ds_no_nan.station_distance_km < 2, drop=True)

## Checking out specific time series in radar data

In [None]:
f = a2.plotting.weather_maps.plot_radar_map_with_tweets(
    ds=ds_close_station,
    grid_shape=(5, 4),
    figsize=None,
    selection_delta_time=1,
    selection_delta_time_units="h",
    selection_key_twitter_time="time_radar",
    selector_use_limits=[True, False],
    choice_type="increment_time",
    xlim=[-1.5, 1.5],
    ylim=[50, 53],
    increment_time_value=np.datetime64("2017-05-17T15:00:00.000000000"),
    increment_time_delta=1,
    increment_time_delta_units="h",
    circle_size_constant=0.1,
    cumulative_radar=True,
    colormap="magma_r",
    circle_alpha=1.0,
    vmax=1,
)

In [None]:
f = a2.plotting.weather_maps.plot_radar_map_with_tweets(
    ds=ds_close_station,
    grid_shape=(5, 4),
    figsize=None,
    selection_delta_time=5,
    selection_delta_time_units="m",
    selection_key_twitter_time="time_radar",
    selector_use_limits=[True, False],
    choice_type="increment_time",
    xlim=[-1.5, 1.5],
    ylim=[50, 53],
    increment_time_value=np.datetime64("2018-04-17T12:00:00.000000000"),
    increment_time_delta=5,
    increment_time_delta_units="m",
    circle_size_constant=0.1,
    cumulative_radar=False,
    colormap="magma_r",
    circle_alpha=1.0,
    vmax=1,
)

In [None]:
f = a2.plotting.weather_maps.plot_radar_map_with_tweets(
    ds=ds_close_station,
    grid_shape=(5, 4),
    figsize=None,
    selection_delta_time=5,
    selection_delta_time_units="m",
    selection_key_twitter_time="time_radar",
    selector_use_limits=[True, False],
    choice_type="increment_time",
    xlim=[-1.5, 1.5],
    ylim=[50, 53],
    increment_time_value=np.datetime64("2018-04-22T14:00:00.000000000"),
    increment_time_delta=5,
    increment_time_delta_units="m",
    circle_size_constant=0.1,
    cumulative_radar=False,
    colormap="magma_r",
    circle_alpha=1.0,
    vmax=1,
)

In [None]:
a2.plotting.weather_maps.plot_tp_station_tweets(
    ds_t,
    df_stations,
    grid_shape=(5, 4),
    colormap="tab20c",
    vmin=0,
    vmax=1,
    fontsize=14,
    choice_type="increment_time",
    selection_delta_time=1,
    selection_delta_time_units="h",
    selector_use_limits=[True, False],
    increment_time_delta=1,
    increment_time_delta_units="h",
    increment_time_value=np.datetime64("2020-01-14T03:30:00.000000000"),
    xlim=[-5, 0],
    ylim=[51, 56],
);

## Histogram radar (era5) vs stations

In [None]:
ds_close_station["station_tp_mm_h"] = (["index"], ds_close_station["station_tp_mm"].values)
ds_close_station["station_tp_mm"] = (["index"], ds_close_station["station_tp_mm_h"].values * 5 / 60)

In [None]:
fig, ax = a2.plotting.utils_plotting.create_figure_axes()
n_bins = 12
a2.plotting.histograms.plot_histogram(
    ds=ds_tweets, x="station_tp_mm", ax=ax, alpha=0.5, n_bins=n_bins, xlim=[0, 2], fig=fig
)
a2.plotting.histograms.plot_histogram(
    ds=ds_tweets, x="tp_mm_radar", xlim=[0, 2], ax=ax, alpha=0.5, n_bins=n_bins, fig=fig
)

In [None]:
isinstance("s", str)

In [None]:
a2.plotting.histograms.plot_histogram_2d(
    ds=ds_no_nan.where((ds_no_nan.station_distance_km < 2) & (ds_no_nan.station_tp_mm != 0), drop=True),
    x="station_tp_mm",
    y="tp_mm_radar",
    xlim=[0, 2],
    ylim=[0, 2],
    n_bins=11,
    norm=None,  # "log",
);

In [None]:
a2.plotting.histograms.plot_histogram_2d(
    ds=ds_no_nan.where(ds_no_nan.station_distance_km < 2, drop=True),
    x="station_tp_mm",
    y="tp_h_mm",
    xlim=[0, 2],
    ylim=[0, 2],
    n_bins=11,
    norm="log",
);