# Assign radar precipitation to Tweets dataset

The notebook was used to assign radar data from nimrod files converted to netcdf files. This is extremely memory intensive (~50TB, only possible on HPC cluster), which is why an alternative function exists that doesn't save extracted netcdf files to disk but just uses this format to assign radar data to the Twitter dataset (see `a2.dataset.radar.assign_radar_to_tweets`).

In [None]:
# allows update of external libraries without need to reload package
%load_ext autoreload
%autoreload 2

In [None]:
import datetime
import glob
import logging
import math
import os
import pathlib
import re
import sys

LIBRARY_PATH = "/p/home/jusers/ehlert1/juwels/a2/src/"
sys.path.append(LIBRARY_PATH)

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tqdm
import xarray

import a2.dataset
import a2.plotting
import a2.utils
import pyproj


logging.basicConfig(level=logging.INFO)

In [None]:
FOLDER_TWEETS = pathlib.Path("/home/kristian/Projects/a2/data/tweets/")
FOLDER_TWEETS = pathlib.Path("/p/scratch/deepacf/maelstrom/maelstrom_data/ap2/data/tweets")

FILE_TWEETS = (
    FOLDER_TWEETS
    / "2017_2020_tweets_rain_sun_vocab_emojis_locations_bba_Tp_era5_no_bots_normalized_filtered_weather_stations_predicted_simpledeberta.nc"
)


FOLDER_RADAR = pathlib.Path("/home/kristian/Downloads/metoffice-c-band-rain-radar_uk_20220102_1km-composite/")
FOLDER_RADAR = pathlib.Path("/home/kristian/Downloads/metoffice-c-band-rain-radar_uk_20090221_1km-composite")
FOLDER_RADAR = pathlib.Path("/p/scratch/deepacf/maelstrom/maelstrom_data/ap2/data/precipitation/radar/badc")

In [None]:
ds_tweets = xarray.open_dataset(FILE_TWEETS)
ds_tweets_sel = ds_tweets.where(
    (ds_tweets.created_at >= np.datetime64("2020-10-09T00:00:00.000000000"))
    & (ds_tweets.created_at <= np.datetime64("2020-10-09T23:55:00.000000000")),
    drop=True,
)

In [None]:
ds_tweets = a2.dataset.radar.assign_radar_to_tweets(
    ds_tweets,
    round_ngt_offset=500,
    round_ngt_decimal=-3,
    round_time_to_base=5,
    base_folder="/p/scratch/deepacf/maelstrom/maelstrom_data/ap2/data/precipitation/radar",
    processes=-1,
)

In [None]:
ds_tweets.to_netcdf(
    FOLDER_TWEETS
    / "2017_2020_tweets_rain_sun_vocab_emojis_locations_bba_Tp_era5_no_bots_normalized_filtered_weather_stations_predicted_simpledeberta_radar.nc"
)

In [None]:
plt.scatter(ds_tweets_sel.created_at.values, ds_tweets_sel["tp_mm_radar"].values)