# Visualising geomagnetic data

In [None]:
# Install latest development version of viresclient
!pip install --upgrade git+https://github.com/ESA-VirES/VirES-Python-Client.git@staging

# %load_ext autoreload
# %autoreload 2

import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from viresclient import SwarmRequest

## Use to make interactive matplotlib plots
##  - doesn't work smoothly
##  - investigating using plotly/bokeh instead
# %matplotlib widget

%load_ext watermark
%watermark -i -v -p numpy,pandas,xarray,matplotlib,ipywidgets,viresclient

## Fetching & loading INTERMAGNET data

Here we use [VirES](https://earth.esa.int/eogateway/tools/vires-for-swarm) to access some ground observatory data [(see here for more)](https://swarm.magneticearth.org/notebooks/04c2_geomag-ground-data-vires). The following code helps you download the data for a given observatory for a given year, and load them as a Pandas Dataframe. 

We will select data from 2003 measured at [the Eskdalemuir observatory (ESK)](http://www.geomag.bgs.ac.uk/operations/eskdale.html) in Scotland, which has a latitude of 55.314° N. If you wish you can select data from a different year or observatory (check [this map of INTERMAGNET observatories](https://intermagnet.github.io/metadata/map) to find the three-letter code that specifies the observatory).

NB: Data is provided in the NEC (North, East, Centre) geocentrically-defined frame, in contrast to [INTERMAGNET data](https://intermagnet.github.io/) from other sources where the geodetic frame is used. This results in a small rotation in the North (X) and Centre (Z) vectors and a slightly different "latitude" for the observatory location.

In [None]:
def fetch_obs_data_for_years(
    observatory="ESK", year_start=2003, year_end=2003, cadence="M",
    use_xarray=False, **kwargs
):
    """Fetch given years of observatory data from VirES, at minute (M) or hour (H) cadence
    
    Args:
        observatory (str): 3-letter IAGA code
        year_start (int): Chosen year to start
        year_end (int): Year to end with (inclusive)
        cadence (str): "M" for minute, or "H" for hour
        use_xarray (bool): Return xarray.Dataset instead
        
    Returns:
        DataFrame
    """
    # Fetch data from VirES
    request = SwarmRequest()
    request.set_collection(f"SW_OPER_AUX_OBS{cadence}2_:{observatory}", verbose=False)
    request.set_products(measurements=["B_NEC", "IAGA_code"])
    data = request.get_between(
        dt.datetime(year_start, 1, 1),
        dt.datetime(year_end+1, 1, 1),
        **kwargs
    )
    if use_xarray:
        ds = data.as_xarray().drop("Spacecraft")
        return ds
    else:
        # Load data in Pandas Dataframe with X, Y, Z columns
        df = data.as_dataframe(expand=True).drop(columns="Spacecraft")
        df = df.rename(columns={f"B_NEC_{i}": j for i, j in zip("NEC", "XYZ")})
        return df

obs_min = fetch_obs_data_for_years(
    observatory="ESK",
    year_start=2003,
    year_end=2003,
    cadence="M",
)
obs_min.head()

## Plotting 1-minute data and their hourly means

This next code cell creates an interactive element that lets you view the data loaded above.

```{tip}
If you are viewing this on the web, you will not be able to interact with it - you will need to be in an active JupyterLab.
```

In [None]:
def plot_subset_timeseries(start_date, end_date, hourly_mean=False, show_annual_mean=False, df=obs_min):
    """Configurably plot a subset of the data
    
    Args:
        start_date (datetime)
        end_date (datetime)
        hourly_mean (bool): Evaluate and plot the hourly means instead
        show_annual_mean (bool): Show offset from annual mean
        df (DataFrame): Assumed to be of form output from fetch_obs_data_for_year
        
    Returns:
        Figure
    """
    # Evaluate annual means to use later,
    #   reindexed with the ending index points for each year
    annual_mean = df.resample("1y").mean()
    annual_mean.index = [df.loc[str(year)].index[-1] for year in df.index.year.unique()]
#     annual_mean.index = [
#         df.loc[i].index[-1]
#         for i in annual_mean.index.astype(str)
#     ]
    # Subset dataframe to selection
    df = df.loc[start_date:end_date]
    # Cut the annual mean (and reindex) to match df
    #  so we can use it directly in ax.fill_between
    annual_mean = annual_mean.loc[str(start_date.year):str(end_date.year)]
    annual_mean = annual_mean.reindex(index=df.index, method="backfill")
    observatory = obs_min["IAGA_code"][0]
    title = f"Minute data from {observatory}"
    if hourly_mean:
        df = df.resample("1h").mean()
        annual_mean = annual_mean.reindex(index=df.index)
        title += ": averaged over each hour"
    if show_annual_mean:
        title += "\nshowing offset from annual mean"
    fig, axes = plt.subplots(nrows=3, figsize=(10, 7), sharex=True)
    for i, cpt in enumerate("XYZ"):
        if show_annual_mean:
            axes[i].fill_between(df.index, df[cpt], annual_mean[cpt])
        else:
            axes[i].plot(df[cpt])
        axes[i].set_ylabel(f"{cpt} (nT)")
        axes[i].grid()
    fig.suptitle(title)
    axes[2].set_xlabel("Date")
    fig.tight_layout()
    return fig

def make_widgets():
    """Use ipywidgets to interact with plot_subset_timeseries"""
    mini, maxi = obs_min.index.min().date(), obs_min.index.max().date()
    start_date = widgets.DatePicker(
        value=mini,
        description='Start Date',
    )
    end_date = widgets.DatePicker(
        value=dt.datetime(mini.year, mini.month+1, mini.day),
        description='End Date',
    )
    hourly_mean = widgets.Checkbox(
        value=True,
        description='Hourly mean',
    )
    annual_mean = widgets.Checkbox(
        value=True,
        description='Show annual mean',
    )
    return widgets.VBox(
        [widgets.Label(f"Select dates within range: {mini}, {maxi}"),
         widgets.HBox([start_date, end_date, widgets.VBox([hourly_mean, annual_mean])]),
         widgets.interactive_output(
             plot_subset_timeseries,
             {'start_date': start_date, 'end_date': end_date, 'hourly_mean': hourly_mean, 'show_annual_mean': annual_mean}
         )]
    )

make_widgets()

```{note}
What signals can you see in this data?
```

```{toggle}
- Daily oscillation: due to the rotation of the Earth driving ionospheric change through the day/night - this is the Sq variation ("solar quiet-day" variation)
- Shift in baseline over the year: due to the change in the main magnetic field from the core - this is the secular variation (SV)
- More random variations due to geomagnetic activity
```

## Daily, seasonal and solar variations in declination

Let's now fetch the hourly dataset - these data are specially processed to improve data quality, over the straightforward hourly means calculated above from the minute data. For more information, see [Macmillan, S., Olsen, N. Observatory data and the Swarm mission. Earth Planet Sp 65, 15 (2013). https://doi.org/10.5047/eps.2013.07.011](https://doi.org/10.5047/eps.2013.07.011)

In [None]:
obs_hourly = fetch_obs_data_for_years(
    observatory="ESK",
    year_start=1900,
    year_end=2020,
    cadence="H",
    # additional kwargs for vireslient.SwarmRequest.get_between()
    asynchronous=False,  # Make synchronous requests (faster)
                         #  - only works for smaller data chunks
                         #  - implicitly disables "Processing" progress bar
    show_progress=False,        # Disable intermediate progress bars
#     leave_intermediate_progress_bars=False,  # Clean up bars as we go
#     show_progress_chunks=False  # Disable "Processing chunks" progress bar
)

We will evaluate the declination angle, D, the horizontal deviation of the field from geographic North [(what are the geomagnetic components?)](https://intermagnet.github.io/faq/10.geomagnetic-comp.html)

Next we summarise the data further by aggregating measurements over each month, evaluating the mean values over hourly intervals. For example, the mean declination at 10am across all days in January, the mean at 11am, and so on, repeated for each time of day and for each month. We then evaluate the offset of these declinations from the mean over the whole of each month - this is stored in `D_variation` in the resulting dataframe.

In [None]:
def monthly_means(df):
    """Return MultIndex DataFrame of monthly means over each hourly interval"""
    # Append hour of day, and approx fractional year, to use for plotting
    df["t_hour"] = df.index.map(lambda x: x.hour + x.minute/60)
    epoch = pd.to_datetime(0, unit='s').to_julian_date()
    df["t_year"] = df.index.map(lambda x: x.year + (x.month-.5)/12)
    # Calculate the monthly mean for each hourly interval of the day
    monthly = df.groupby([df.index.year, df.index.month, df.index.hour]).mean()
    monthly.index.names = ["Year", "Month", "Hour"]
    # Calculate the monthly mean over all hourly intervals
    monthly_all = df.groupby([df.index.year, df.index.month]).mean()
    # Calculate the daily declination variations:
    #   the monthly average of the hourly intervals minus the total monthly mean
    monthly["D_variation"] = monthly['D'].values - monthly_all['D'].values.repeat(24)
    return monthly

obs_hourly["D"] = np.rad2deg(np.arctan2(obs_hourly["Y"], obs_hourly["X"]))
obs_monthly = monthly_means(obs_hourly)
obs_monthly

In [None]:
def plot_dec_variation(df, year_start=1986, year_end=1997):
    """Make surface plot of declination variation against UT and Year"""
    # Make subselection to plot, slicing along years
    df = df.loc[slice(year_start, year_end), :]
    fig, ax = plt.subplots(1, 1, figsize=(10, 10), subplot_kw={"projection": "3d"})
    ax.plot_trisurf(
        df["t_year"], df["t_hour"], df["D_variation"],
        cmap=plt.cm.jet, vmin=-0.15, vmax=0.15, antialiased=True
    )
    ax.set_xlabel("Year")
    ax.set_ylabel("Hour (UT)")
    ax.set_zlabel("D variations (degrees)")
    ax.view_init(elev=60, azim=210)
    return fig, ax

plot_dec_variation(obs_monthly, 1986, 2008);