In [None]:
# allows update of external libraries without need to reload package
%load_ext autoreload
%autoreload 2

In [None]:
%load_ext line_profiler

In [None]:
import numpy as np
import plotly.express
import matplotlib.pyplot as plt
import pandas as pd
import geopy.distance
import re
import functools
import logging
import pathlib

logging.basicConfig(level=logging.INFO)

import a2.dataset
import a2.plotting
import a2.utils

In [None]:
FOLDERS_TWEETS = pathlib.Path("../../data/tweets/")
FOLDER_FIGURES = pathlib.Path("../../figures/data/weather_stations")
FILE_TWEETS = (
    FOLDERS_TWEETS
    / "data/tweets/2017_2020_tweets_rain_sun_vocab_emojis_locations_bba_Tp_era5_no_bots_normalized_filtered.nc"
)
FILE_TWEETS = (
    FOLDERS_TWEETS
    / "2017_2020_tweets_rain_sun_vocab_emojis_locations_bba_Tp_era5_no_bots_normalized_filtered_weather_stations_predicted_simpledeberta_radar.nc"
)
FILE_WEATHER_STATIONS = "../../data/weather_stations/weather_stations_hourly_rainfall_uk_2017-2020_reduced.csv"

In [None]:
ds = a2.dataset.load_dataset.load_tweets_dataset(FILE_TWEETS, open=False)
ds["raining"] = (["index"], np.array(ds.tp_h.values > 1e-8, dtype=int))

In [None]:
ds_sel = ds.where(
    (ds.created_at >= np.datetime64("2020-10-09T00:00:00.000000000"))
    & (ds.created_at <= np.datetime64("2020-10-09T23:55:00.000000000")),
    drop=True,
)

In [None]:
df_weather_stations = a2.dataset.load_dataset.load_weather_stations(FILE_WEATHER_STATIONS)

In [None]:
df_weather_stations

In [None]:
n_measurement_per_station = []
latitudes = []
longitudes = []
df_weather_stations = a2.dataset.stations.add_station_number(df_weather_stations)
for i_station in df_weather_stations.station_number.unique():
    df_station = a2.dataset.stations.get_dataframe_from_station_number(df_weather_stations, i_station)
    lat, long = df_station.latitude.values[0], df_station.longitude.values[0]
    n = df_station.shape[0]
    latitudes.append(lat)
    longitudes.append(long)
    n_measurement_per_station.append(n)
n_measurement_per_station = np.array(n_measurement_per_station)
latitudes = np.array(latitudes)
longitudes = np.array(longitudes)

In [None]:
fig, axes = a2.plotting.utils_plotting.create_figure_axes(aspect="equal", font_size=16)
scat = axes.scatter(longitudes, latitudes, c=[x / 35040 for x in n_measurement_per_station])
colorbar = plt.colorbar(scat, ax=axes)
axes.set_xlabel("longitude")
axes.set_ylabel("latitude")
ax_colorbar = colorbar.ax
ax_colorbar.set_ylabel("Data coverage")
a2.plotting.utils_plotting.save_figure(fig, FOLDER_FIGURES / "weather_station_data_coverage.png")

In [None]:
fig = plotly.graph_objects.Figure()
marker_dict = dict(
    color=n_measurement_per_station,
    showscale=True,
)
fig.add_trace(plotly.graph_objects.Scatter(x=longitudes, y=latitudes, mode="markers", marker=marker_dict))
fig.update_yaxes(
    scaleanchor="x",
    scaleratio=1,
)

In [None]:
ds_stations = a2.dataset.stations.add_station_precipitation(ds, df_weather_stations)
ds_stations["tp_h_mm"] = (["index"], ds_stations.tp_h.values * 1e3)

In [None]:
ds_stations.to_netcdf(
    "2017_2020_tweets_rain_sun_vocab_emojis_locations_bba_Tp_era5_no_bots_normalized_filtered_weather_stations_fix_predicted_simpledeberta_radar.nc"
)

In [None]:
a2.plotting.weather_maps.plot_tp_station_tweets(
    ds_stations,
    df_weather_stations,
    grid_shape=(5, 2),
    delta_time=0.45,
    colormap="tab20c",
    vmin=0,
    vmax=1,
    fontsize=14,
    xlim=[-5, 1],
    ylim=[50, 56],
    choice_type="increment_time",
    increment_time_value=np.datetime64("2020-10-09T07:30:00.000000000"),
)

In [None]:
ds_stations["raining_station"] = (
    ["index"],
    np.array(ds_stations.station_tp_mm.values > 0, dtype=int),
)
ds_stations["inconsistent_rain"] = (
    ["index"],
    np.abs(ds_stations.raining.values - ds_stations.raining_station.values),
)
ds_stations["difference_tp"] = (
    ["index"],
    np.abs(ds_stations.station_tp_mm.values - ds_stations.tp_h.values),
)

In [None]:
ds_stations.to_netcdf(
    "../../data/tweets/2017_2020_tweets_rain_sun_vocab_emojis_locations_bba_Tp_era5_no_bots_normalized_filtered_weather_stations.nc"
)

In [None]:
df = ds_stations.to_dataframe().dropna(subset="station_tp_mm").reset_index(drop=True)
df.dropna(subset="station_tp_mm")
df = df.convert_dtypes()
df

In [None]:
hover_keys = ["text_normalized"]
fig = plotly.express.scatter(
    data_frame=df,
    x="station_tp_mm",
    y="tp_h",
    color="station_distance_km",
    range_color=[0, 20],
    hover_data=hover_keys,
    # facet_col="inconsistent",
    color_continuous_scale=plotly.express.colors.sequential.Viridis,
)
fig.show()

In [None]:
hover_keys = ["text_normalized"]
fig = plotly.express.scatter(
    data_frame=ds_stations.to_dataframe(),
    x="difference_tp",
    y="station_distance_km",
    color="raining_station",
    # range_color=[0, 40],
    hover_data=hover_keys,
    # facet_col="inconsistent",
    color_continuous_scale="Aggrnyl",
)
fig.show()

In [None]:
# %lprun -f add_station_precipitation add_station_precipitation(ds.sel(index=slice(1000)), df)

In [None]:
df

In [None]:
df["ob_end_time"]

In [None]:
def str_to_delta_time(string):
    time, units, _ = re.split("([a-zA-Z]+)$", string)
    return float(time), units


str_to_delta_time("30hasd")