In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import xarray as xr

In [2]:
submission_data = pd.read_csv("data/submission_data_Stochastic_Parrots.csv")
training_data = pd.read_csv("data/training_data.csv")
nwp = xr.load_dataset("data/hres_1day_south_scotland_202101_202306.nc")
wf_capacity = pd.read_csv("data/Central Scotland Wind Farms.csv")
gb_wind_farms = pd.read_csv("data/onshore_wind_farms.csv", encoding="cp1252")

In [3]:
training_data = training_data.set_index("dtm")
training_data.head()

Unnamed: 0_level_0,wind_generation_MW,solar_generation_MW,total_generation_MW
dtm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-01T00:00:00Z,544.598,0.0,544.598
2021-01-01T00:30:00Z,534.354,0.0,534.354
2021-01-01T01:00:00Z,482.512,0.0,482.512
2021-01-01T01:30:00Z,426.26,0.0,426.26
2021-01-01T02:00:00Z,346.434,0.0,346.434


In [4]:
wf_capacity

Unnamed: 0,Name,Installed Capacity,Latitude,Longitude
0,Auchrobert Wind Farm,36,55.6239,-3.98417
1,Andershaw Wind Farm,35,55.53008,-3.799647
2,Black Law,134,55.77787,-3.707898
3,Black Law Extension Phase 1,69,55.77787,-3.707898
4,Braes of Doune,74,56.269829,-4.058936
5,Clyde Central,196,55.419488,-3.634978
6,Clyde North,198,55.468552,-3.595056
7,Clyde South,128,55.419488,-3.634978
8,Douglas West Dalquhandy,45,55.5651,-3.87285
9,Galawhistle Wind Farm,56,55.5,-4.0


### Step 1 - Disaggregate Over Wind Farm Sites

In [5]:
gb_wind_farms_dict = {
    "Auchrobert Wind Farm": "Auchrobert Wind Farm",
    "Harestanes": "Harestanes",
    "Black Law": "Black Law",
    "Andershaw": "Andershaw Wind Farm",
    "Black Law Extension Phase 1": "Black Law Extension - 1a",
    "Braes of Doune	": "Braes O'Doune",
    "Clyde Central": "Clyde Extension",
    "Dalquhandy": "Dalquhandy Windfarm",
    "Galawhistle Wind Farm": "Galawhistle",
    "Kype Muir": "Kype Muir Wind Farm",
    "Middle Muir": "Middle Muir Wind Farm",
    "Tullo": "Tullo Farm",
    "Tullo Wind Farm Ext": "Tullo Wind Farm South (Ext.)",
    "Whitelee 1": "Whitelee",
    "Whitelee 2": "Whitelee Extension"}

gb_wind_farms.loc[gb_wind_farms["Site Name"].isin(list(gb_wind_farms_dict.values()))]

Unnamed: 0,Ref ID,Operator (or Applicant),Site Name,Technology Type,Installed Capacity (MWelec),Turbine Capacity (MW),No. of Turbines,Height of Turbines (m),X-coordinate,Y-coordinate
368,3116,Scottish Power Renewables,Black Law,Wind Onshore,124.0,2.3,54.0,,289500.0,653500.0
371,3119,Greencoat UK Wind,Braes O'Doune,Wind Onshore,72.0,2.0,36.0,,272590.0,710500.0
674,3489,CRE Energy/ Scottish Power,Whitelee,Wind Onshore,322.0,2.3,140.0,,256800.0,645435.0
784,4422,Eneco,Tullo Wind Farm South (Ext.),Wind Onshore,10.0,2.5,4.0,,375388.0,770733.0
1243,4119,Scottish Power Renewables,Harestanes,Wind Onshore,136.0,2.0,68.0,,300000.0,595000.0
1311,4189,Lantern Bidco,Dalquhandy Windfarm,Wind Onshore,45.0,2.0,15.0,150.0,214836.0,591232.0
1370,4253,Scottish Power Renewables,Black Law Extension - 1a,Wind Onshore,48.4,1.67,29.0,,289500.0,653500.0
1449,4337,Falck Renewables,Auchrobert Wind Farm,Wind Onshore,36.0,2.5,12.0,,275154.0,638500.0
1478,4368,Ventient (formerly Infinis),Galawhistle,Wind Onshore,66.0,3.0,22.0,,275440.0,629090.0
1544,4440,Banks Renewables,Middle Muir Wind Farm,Wind Onshore,51.0,3.4,15.0,,286178.0,625826.0


#### Derived Features

In [6]:
nwp["ws100"] = pow(pow(nwp.u100,2) + pow(nwp.v100,2), 0.5)
nwp["ws10"] = pow(pow(nwp.u100,2) + pow(nwp.v100,2), 0.5)

nwp["angle_10"] = np.arctan2(nwp.v10, nwp.u10)
nwp["angle_100"] = np.arctan2(nwp.v100, nwp.u100)

In [7]:
def get_variable_from_netcfd(lon, lat, var, nwp=nwp):

    return (nwp[var].sel(longitude=lon, latitude = lat, method="nearest")
            .to_dataframe()
            .set_index("valid_time")[[var]]
            .resample("1800s")
            .interpolate())

get_variable_from_netcfd(-3.5, 55.5, "ws100")

Unnamed: 0_level_0,ws100
valid_time,Unnamed: 1_level_1
2021-01-02 00:00:00,5.347208
2021-01-02 00:30:00,5.537805
2021-01-02 01:00:00,5.728402
2021-01-02 01:30:00,5.920444
2021-01-02 02:00:00,6.112487
...,...
2023-06-30 21:00:00,7.465930
2023-06-30 21:30:00,7.930463
2023-06-30 22:00:00,8.394996
2023-06-30 22:30:00,8.745025


In [50]:
test = pd.DataFrame(index = training_data.index)

In [28]:
total_training_dict = {}

for var in ["ws10", "ws100", "angle_10", "angle_100"]:
    for i, row in wf_capacity.iterrows():
        lon = row["Longitude"]
        lat = row["Latitude"]
        total_training_dict.update({f"wf_{i}_{var}": get_variable_from_netcfd(lon, lat, var)[var]})

total_training_df = pd.concat(total_training_dict, axis=1)

In [31]:
total_training_df.T.drop_duplicates().T

Unnamed: 0_level_0,wf_0_ws10,wf_1_ws10,wf_2_ws10,wf_4_ws10,wf_5_ws10,wf_6_ws10,wf_8_ws10,wf_9_ws10,wf_10_ws10,wf_13_ws10,...,wf_1_angle_100,wf_2_angle_100,wf_4_angle_100,wf_5_angle_100,wf_6_angle_100,wf_8_angle_100,wf_9_angle_100,wf_10_angle_100,wf_13_angle_100,wf_15_angle_100
valid_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-02 00:00:00,5.173418,4.861964,4.311080,4.596360,7.417416,5.016754,4.951170,4.581340,7.488032,6.141513,...,-0.854470,-0.563900,-0.838560,-1.126116,-0.926497,-0.716577,-0.774386,-1.206763,-0.733394,-0.644166
2021-01-02 00:30:00,5.375004,5.179488,4.547919,4.723401,7.523379,5.304816,5.224018,4.784647,7.398266,6.014855,...,-0.835943,-0.518919,-0.817929,-1.102399,-0.900692,-0.707300,-0.761499,-1.175030,-0.686655,-0.627349
2021-01-02 01:00:00,5.576590,5.497012,4.784760,4.850442,7.629343,5.592878,5.496865,4.987955,7.308500,5.888198,...,-0.817417,-0.473938,-0.797299,-1.078683,-0.874888,-0.698022,-0.748612,-1.143297,-0.639916,-0.610533
2021-01-02 01:30:00,5.780569,5.874023,4.901818,5.110626,7.722732,5.845023,5.767793,5.339160,7.242871,5.868638,...,-0.833953,-0.506174,-0.841580,-1.070812,-0.886910,-0.719722,-0.778644,-1.131067,-0.635723,-0.655463
2021-01-02 02:00:00,5.984549,6.251035,5.018877,5.370809,7.816121,6.097167,6.038720,5.690365,7.177241,5.849078,...,-0.850488,-0.538410,-0.885861,-1.062940,-0.898932,-0.741423,-0.808677,-1.118836,-0.631529,-0.700393
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-30 21:00:00,9.278943,8.188334,7.933685,5.745156,6.746830,7.503975,8.546464,9.238339,5.284083,8.411113,...,0.316818,0.477187,0.633484,0.286291,0.321420,0.362754,0.299100,0.490260,0.726969,0.488759
2023-06-30 21:30:00,9.428005,8.717640,8.113911,5.930095,7.333937,8.011408,8.789968,9.570379,5.534676,8.487860,...,0.305976,0.466847,0.613847,0.252488,0.301557,0.356926,0.302887,0.386780,0.736645,0.474355
2023-06-30 22:00:00,9.577068,9.246946,8.294136,6.115033,7.921045,8.518841,9.033474,9.902420,5.785268,8.564607,...,0.295133,0.456508,0.594209,0.218685,0.281695,0.351098,0.306674,0.283299,0.746322,0.459951
2023-06-30 22:30:00,9.585122,9.445045,8.437646,6.368641,8.274918,8.836954,9.050447,10.095955,5.797058,8.504137,...,0.285750,0.437280,0.546439,0.195880,0.266166,0.333190,0.292860,0.278181,0.728308,0.436951


In [21]:
pd.DataFrame(total_training_dict, index=training_data.index)

KeyboardInterrupt: 