In [55]:
import numpy as np
import pandas as pd
import netCDF4
import glob
from scipy import interpolate

In [56]:
# turbines and sector power output
turbines = pd.read_csv('windturbines_fillna_shiftlon_xy.csv')
turbines.drop(columns=['Unnamed: 0.1'], inplace=True)

sector_power = pd.read_csv('windpower_task0_updated.csv')

In [57]:
turbines.head()

Unnamed: 0.1,Unnamed: 0,Wind turbine ID,Terrain height [m],Nacelle height [m],Rotor diameter [m],Max power [MW],Price region,Installation date,Longitude,Latitude,point,closest,indices,x,y,"(y,x)"
0,0,1781-V-010-001,65.12,30.0,14.0,0.02,SE3,1881-12-31,14.133793,59.253088,"(59.25308804, 14.13379311)","(59.228135647012415, 14.079305883038023)",4609,27,46,"(46, 27)"
1,1,1785-V-006-001,59.07,16.0,11.0,0.01,SE3,1881-12-31,12.802235,59.380151,"(59.380151299999994, 12.80223545)","(59.392550713030694, 12.840249433484077)",3428,20,48,"(48, 20)"
2,2,1880-V-020-001,45.05,16.0,90.0,0.02,SE3,1881-12-31,15.075022,59.178468,"(59.17846848, 15.07502163)","(59.14169787003992, 15.131470469888283)",5622,33,45,"(45, 33)"
3,3,2321-V-002-001,413.0,98.0,90.0,0.02,SE2,1881-12-31,13.376852,63.437566,"(63.43756636, 13.37685237)","(63.446103743945535, 13.340484662552742)",4149,24,93,"(93, 24)"
4,4,2321-V-001-001,413.58,98.0,90.0,0.1,SE2,1881-12-31,13.376548,63.436982,"(63.43698165, 13.37654786)","(63.446103743945535, 13.340484662552742)",4149,24,93,"(93, 24)"


In [58]:
turbine_features = ['Terrain height [m]', 'Nacelle height [m]', 'Rotor diameter [m]', 'Max power [MW]', 'Price region', 'Installation date', 'Longitude', 'Latitude', 'x', 'y', '(y,x)' ]

In [85]:
turbines_SE1 = turbines.loc[turbines['Price region']=='SE1', turbine_features]
turbines_SE1.reset_index(inplace=True)
turbines_SE2 = turbines.loc[turbines['Price region']=='SE2', turbine_features]
turbines_SE2.reset_index(inplace=True)
turbines_SE3 = turbines.loc[turbines['Price region']=='SE3', turbine_features]
turbines_SE3.reset_index(inplace=True)
turbines_SE4 = turbines.loc[turbines['Price region']=='SE4', turbine_features]
turbines_SE4.reset_index(inplace=True)

In [86]:
turbines_SE1_np = turbines_SE1.to_numpy()
turbines_SE2_np = turbines_SE2.to_numpy()
turbines_SE3_np = turbines_SE3.to_numpy()
turbines_SE4_np = turbines_SE4.to_numpy()

In [99]:
listoffiles=(glob.glob("/data/*.nc"))

In [101]:
timestamp_list = []

for file in listoffiles:
    weather = netCDF4.Dataset(file, 'r')
    var = weather.variables['time']
    Timestamp = netCDF4.num2date(var[:], units=var.units)
    timestamp_list.append(Timestamp)

In [112]:
timestamp_list_reordered = []
for i in range(len(timestamp_list)):
    for j in range(24):
        a = timestamp_list[i][j]
        timestamp_list_reordered.append(a)
        

In [114]:
len(timestamp_list_reordered)

10152

In [116]:
b = netCDF4.num2date(var[0], units=var.units)
b

cftime.DatetimeGregorian(2001-02-28 00:00:00)

In [117]:
timestamp_list_reordered.index(b)

10128

In [62]:
weather_features = ['Temperature', 'RelativeHumidity', 'Wind_U', 'Wind_V', 'Pressure', 'CloudCover', 'WindGustSpeed']

In [None]:
listoffiles=(glob.glob("/data/*.nc"))

weather_features = ['Temperature', 'RelativeHumidity', 'Wind_U', 'Wind_V', 'Pressure', 'CloudCover', 'WindGustSpeed']


n_times = len(timestamp_list_reordered)

weather_at_turbine_locations_SE1 = np.zeros(shape=(n_times,len(turbines_SE1.index),len(weather_features)))
weather_at_turbine_locations_SE2 = np.zeros(shape=(n_times,len(turbines_SE2.index),len(weather_features)))
weather_at_turbine_locations_SE3 = np.zeros(shape=(n_times,len(turbines_SE3.index),len(weather_features)))
weather_at_turbine_locations_SE4 = np.zeros(shape=(n_times,len(turbines_SE4.index),len(weather_features)))



for file in listoffiles:
    ds = netCDF4.Dataset(file, 'r') #open weatherdata dataset
    var = ds.variables['time']
    
    for t in range(24):
        
        i = timestamp_list_reordered.index(netCDF4.num2date(var[t], units=var.units)) #will fill in weather_at_turbine_locations_SE1[i,j,k] [n_times, n_turbines, n_weather features]
        k = 0 #set up the counter for the weather features
        
        for feature in weather_features:
        
            data = ds.variables[feature][t,:,:,:]
            m = data.mean(axis=0) #taking the mean of the weather feature over the weather ensembles
            s = data.std(axis=0) #taking the stdev of the weather feature over the weather ensembles
            
            for j in range(0, len(turbines_SE1.index)):
                weather_at_turbine_locations_SE1[i,j,k] = m[turbines_SE1['y'][j],turbines_SE1['x'][j]]
                    
            for j in range(0, len(turbines_SE2.index)):
                weather_at_turbine_locations_SE2[i,j,k] = m[turbines_SE2['y'][j],turbines_SE2['x'][j]]
                
            for j in range(0, len(turbines_SE3.index)):
                weather_at_turbine_locations_SE3[i,j,k] = m[turbines_SE3['y'][j],turbines_SE3['x'][j]]
                
            for j in range(0, len(turbines_SE4.index)):
                weather_at_turbine_locations_SE4[i,j,k] = m[turbines_SE4['y'][j],turbines_SE4['x'][j]]
                
            k = k+1 #populates next column with next weather feature
            
   
            

In [120]:
weather_at_turbine_locations_SE1.shape

(24, 472, 7)