In [None]:
import pandas as pd
import xarray as xr
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from shapely.geometry import Point

In [None]:
def get_variable_from_netcfd(df, lon, lat, var):
    """Extract variable from netcdf file and interpolate to 30 min intervals"""
    return (
        df[var]
        .sel(longitude=lon, latitude=lat, method="nearest")
        .to_dataframe()
        .set_index("valid_time")[[var]]
        .resample("1800s")
        .interpolate()
    )


def get_ssrd_from_netcfd(df, lon, lat):
    ssrd = get_variable_from_netcfd(df, lon, lat, "ssrd")
    ssrd["radiation"] = (ssrd["ssrd"] - ssrd["ssrd"].shift(1)).clip(lower=0)
    ssrd["radiation"] = ssrd["radiation"].fillna(0)
    return ssrd[["radiation"]]

In [None]:
training = pd.read_csv("data/training_data.csv")[["dtm", "solar_generation_MW"]]
training["valid_time"] = pd.to_datetime(training["dtm"])
training = training.drop(columns=["dtm"])

In [None]:
nwp = xr.load_dataset("data/hres_1day_south_scotland_202101_202306.nc")

In [None]:
solar_variables = ["t2m", "d2m", "lcc", "mcc", "hcc", "tp"]

In [None]:
# Create range of 4 latitudes and 4 longitudes
# from the maximum and minimum values of the NWP data

lat_range = np.linspace(
    nwp.latitude.values.tolist()[0], nwp.latitude.values.tolist()[-1], 4
)
lon_range = np.linspace(nwp.longitude.min(), nwp.longitude.max(), 4)

In [None]:
lat_range = nwp.latitude.values.tolist()
lon_range = nwp.longitude.values.tolist()

In [None]:
scotland_gdf = gpd.read_file("lad.json")

In [None]:
cross_array = np.array(np.meshgrid(lat_range, lon_range)).T.reshape(-1, 2)
points = [Point(lon, lat) for lat, lon in cross_array]
inside_points = [point for point in points if scotland_gdf.contains(point).any()]
cross_array = np.array([[point.y, point.x] for point in inside_points])

In [None]:
ssrd = pd.DataFrame()

for lat, lon in cross_array:
    weather = get_ssrd_from_netcfd(nwp, lon, lat)
    weather["latitude"] = round(lat, 1)
    weather["longitude"] = round(lon, 2)
    ssrd = pd.concat([ssrd, weather])

ssrd = ssrd.reset_index()
ssrd["valid_time"] = ssrd["valid_time"].dt.tz_localize("UTC")

In [None]:
training_radation = training.merge(ssrd)

In [None]:
correlations = (
    pd.DataFrame(
        training_radation.groupby(by=["latitude", "longitude"])[
            ["solar_generation_MW", "radiation"]
        ]
        .corr()
        .iloc[0::2, -1]
    )
    .reset_index()
    .drop(columns=["level_2"])
)

In [None]:
latitudes = nwp.latitude.values.tolist()
longitudes = nwp.longitude.values.tolist()

fig, ax = plt.subplots()

scotland_gpd_df = gpd.read_file("lad.json")
scotland_plot = scotland_gpd_df.plot(ax=ax)
scotland_plot.set_xlim(-8, -1.5)
scotland_plot.set_ylim(54, 60)


rect = Rectangle(
    (nwp.longitude.min() - 0.05, nwp.latitude.min() - 0.05),
    (nwp.longitude.max() - nwp.longitude.min()) + 0.1,
    nwp.latitude.max() - nwp.latitude.min() + 0.1,
    linewidth=1,
    edgecolor="r",
    facecolor="none",
)

scotland_plot.scatter(
    correlations["longitude"],
    correlations["latitude"],
    c=correlations["radiation"],
    s=50,
    cmap="coolwarm",
)


fig.set_size_inches(10, 10)

ax.set_ylim(54.5, 56.6)
ax.set_xlim(-6, -1.8)

ax.add_patch(rect)

plt.show()

In [None]:
# ssrd = pd.DataFrame()

# for lat, lon in cross_array:
#     weather_feature_list = []
#     for var in solar_variables:
#         weather_feature_list.append(get_variable_from_netcfd(nwp, lon, lat, var))
#     weather_feature_list.append(get_ssrd_from_netcfd(nwp, lon, lat))
#     weather = pd.concat(weather_feature_list, axis=1)
#     weather["latitude"] = round(lat, 1)
#     weather["longitude"] = round(lon, 2)
#     ssrd = pd.concat([ssrd, weather])

In [None]:
correlations['radiation'].plot(kind = 'hist', bins = 100)

In [None]:
solar_mean["days_since_start_of_year"] = solar_mean["valid_time"].dt.dayofyear
solar_mean["hour"] = solar_mean["valid_time"].dt.hour

In [None]:
solar_mean["sin_days"] = np.sin(
    2 * np.pi * solar_mean["days_since_start_of_year"] / 365
)
solar_mean["cos_days"] = np.cos(
    2 * np.pi * solar_mean["days_since_start_of_year"] / 365
)
solar_mean["sin_hour"] = np.sin(2 * np.pi * solar_mean["hour"] / 24)
solar_mean["cos_hour"] = np.cos(2 * np.pi * solar_mean["hour"] / 24)

In [None]:
solar_mean.merge()

In [None]:
for col in solar_variables:
    