In [1]:
# Basics
import pandas as pd
import numpy as np
from datetime import datetime


# Visualization
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns
from windrose import WindroseAxes


# Preprocessing 
from sklearn.preprocessing import StandardScaler
from sklearn.impute import KNNImputer

**Dataset description:**

1) Windfarm's data:
    - 1 dataset from 2013 to 2016; data recorded every 10 min.
    - 1 dataset from 2017 to January 2018; data recorded every 10 min.

2) Meteorological station data:
    - 1 dataset from 2013 to January 2018; data recorder every 3 hrs.



### 1) Load Windfarm's data 
    - 1.1 Preliminary analysis
    - 1.2 Concatenate windfarm dataframes

In [2]:
# Load windfarm's data:

# Dataset 1: windfarm's data between 2013 and 2016
df_13_16= pd.read_csv('la-haute-borne-data-2013-2016.csv', delimiter= ';')

# Dataset 2: windfarm's data between 2017 and 2018
df_17_18= pd.read_csv('la-haute-borne-data-2017-2020.csv', delimiter= ';')

**1.1 Preliminary analysis**

In [3]:
# Head of df_13_16; 138 columns are displayed
pd.set_option('display.max_columns', None)
df_13_16.head()

Unnamed: 0,Wind_turbine_name,Date_time,Ba_avg,Ba_min,Ba_max,Ba_std,Rt_avg,Rt_min,Rt_max,Rt_std,DCs_avg,DCs_min,DCs_max,DCs_std,Cm_avg,Cm_min,Cm_max,Cm_std,P_avg,P_min,P_max,P_std,Q_avg,Q_min,Q_max,Q_std,S_avg,S_min,S_max,S_std,Cosphi_avg,Cosphi_min,Cosphi_max,Cosphi_std,Ds_avg,Ds_min,Ds_max,Ds_std,Db1t_avg,Db1t_min,Db1t_max,Db1t_std,Db2t_avg,Db2t_min,Db2t_max,Db2t_std,Dst_avg,Dst_min,Dst_max,Dst_std,Gb1t_avg,Gb1t_min,Gb1t_max,Gb1t_std,Gb2t_avg,Gb2t_min,Gb2t_max,Gb2t_std,Git_avg,Git_min,Git_max,Git_std,Gost_avg,Gost_min,Gost_max,Gost_std,Ya_avg,Ya_min,Ya_max,Ya_std,Yt_avg,Yt_min,Yt_max,Yt_std,Ws1_avg,Ws1_min,Ws1_max,Ws1_std,Ws2_avg,Ws2_min,Ws2_max,Ws2_std,Ws_avg,Ws_min,Ws_max,Ws_std,Wa_avg,Wa_min,Wa_max,Wa_std,Va1_avg,Va1_min,Va1_max,Va1_std,Va2_avg,Va2_min,Va2_max,Va2_std,Va_avg,Va_min,Va_max,Va_std,Ot_avg,Ot_min,Ot_max,Ot_std,Nf_avg,Nf_min,Nf_max,Nf_std,Nu_avg,Nu_min,Nu_max,Nu_std,Rs_avg,Rs_min,Rs_max,Rs_std,Rbt_avg,Rbt_min,Rbt_max,Rbt_std,Rm_avg,Rm_min,Rm_max,Rm_std,Pas_avg,Pas_min,Pas_max,Pas_std,Wa_c_avg,Wa_c_min,Wa_c_max,Wa_c_std,Na_c_avg,Na_c_min,Na_c_max,Na_c_std
0,R80711,2013-01-07T01:20:00+01:00,41.16,-1.0,44.990002,11.27,12.65,12.0,13.0,0.41,204.71001,125.76,987.83002,140.62,-6.15,-6.39,-5.89,0.08,-1.38,-5.62,-0.7,0.89,0.0,-0.79,0.13,0.03,1.38,0.7,5.62,0.89,1.0,1.0,1.0,0.0,202.28,123.62,985.67999,140.58,28.33,28.0,28.5,0.1,25.99,25.799999,26.200001,0.12,32.580002,32.049999,32.950001,0.2,36.950001,36.75,37.200001,0.08,37.110001,37.099998,37.200001,0.02,37.779999,37.599998,38.049999,0.05,38.43,38.200001,38.650002,0.1,18.75,18.75,18.75,0.0,20.57,20.5,20.6,0.04,3.36,1.84,5.54,0.53,3.74,2.35,5.41,0.51,3.55,2.16,5.28,0.5,22.77,336.81,72.489998,10.74,,,,,,,,,4.04,-41.950001,53.73,10.74,5.25,5.2,5.3,0.05,49.98,49.959999,50.009998,0.01,705.81,704.90997,706.41998,0.34,1.91,1.17,9.34,1.32,18.299999,18.299999,18.299999,0.0,-69.540001,-311.60999,-15.66,30.809999,,,,,37.77,,,,33.75,,,
1,R80711,2013-01-05T22:20:00+01:00,-1.0,-1.0,-0.87,0.01,12.95,12.0,13.0,0.16,1157.03,968.13,1370.16,106.87,1556.76,834.54999,2278.9399,344.91,161.86,47.68,301.62,60.209999,20.17,10.61,25.120001,2.97,163.41,50.060001,302.07999,59.549999,0.99,1.0,0.91,0.02,1155.52,966.42999,1368.72,106.94,29.58,28.9,30.299999,0.33,28.09,27.5,28.700001,0.34,42.130001,40.450001,43.650002,0.69,53.970001,53.200001,54.650002,0.37,53.459999,52.599998,54.099998,0.5,43.459999,43.0,43.950001,0.26,44.369999,43.849998,44.700001,0.22,264.92999,259.12,285.45999,10.53,20.610001,20.5,20.799999,0.1,5.07,3.47,6.92,0.58,5.54,3.84,7.57,0.62,5.3,3.68,7.24,0.58,298.35001,242.8,326.35001,10.9,,,,,,,,,12.88,-39.16,49.709999,15.27,4.77,4.7,4.9,0.08,50.02,50.0,50.049999,0.01,703.96002,701.51001,705.13,0.77,11.0,9.19,13.06,1.02,20.709999,20.5,20.9,0.12,1302.11,470.04001,2110.98,389.28,,,,,313.35001,,,,279.92999,,,
2,R80711,2013-01-06T08:30:00+01:00,-1.0,-1.0,-1.0,0.0,13.74,13.0,14.0,0.35,1021.18,970.07001,1090.63,34.23,1142.24,759.06,1384.63,120.2,91.400002,47.220001,125.49,16.01,22.74,12.86,29.74,2.39,94.32,52.43,127.89,15.54,0.96,0.99,0.91,0.01,1019.75,968.70001,1089.1899,34.25,42.52,41.75,43.200001,0.36,41.549999,40.900002,42.0,0.28,64.589996,63.150002,66.150002,0.52,53.66,53.299999,54.099998,0.16,51.779999,51.299999,52.299999,0.26,45.049999,44.599998,45.75,0.23,46.009998,45.700001,46.25,0.11,231.67999,231.67999,231.67999,0.0,23.24,18.9,24.0,1.32,4.64,3.12,6.25,0.51,4.82,3.29,6.38,0.52,4.73,3.21,6.27,0.49,226.59,173.67,258.26999,12.87,,,,,,,,,-5.09,-58.009998,26.59,12.87,5.84,5.8,5.9,0.04,49.98,49.959999,50.0,0.01,701.56,700.03998,703.15997,0.43,9.7,9.21,10.37,0.33,22.0,22.0,22.1,0.01,851.96002,465.51999,1101.86,122.77,,,,,241.59,,,,246.67999,,,
3,R80711,2013-01-05T21:10:00+01:00,44.990002,44.990002,44.990002,0.0,12.14,12.0,13.0,0.27,122.29,79.279999,175.2,22.84,-6.09,-6.42,-5.86,0.09,-0.92,-1.41,-0.58,0.13,0.0,-0.19,0.1,0.01,0.92,0.58,1.41,0.13,1.0,1.0,1.0,0.0,119.91,76.730003,172.60001,22.82,29.049999,28.9,29.200001,0.05,26.280001,26.1,26.4,0.1,32.990002,32.700001,33.400002,0.15,39.470001,39.400002,39.5,0.04,39.919998,39.799999,40.0,0.04,40.93,40.849998,41.349998,0.07,41.73,41.599998,41.900002,0.05,259.12,259.12,259.12,0.0,21.09,20.9,21.1,0.04,2.47,1.28,3.85,0.53,2.85,1.2,4.24,0.53,2.66,1.26,4.01,0.52,265.75,229.00999,308.95001,12.62,,,,,,,,,6.64,-30.110001,49.830002,12.62,5.36,5.3,5.5,0.09,49.959999,49.919998,50.009998,0.02,698.96997,698.03998,700.88,0.51,1.14,0.73,1.63,0.22,19.4,19.4,19.6,0.02,-76.339996,-141.82001,-39.889999,19.17,,,,,280.75,,,,274.12,,,
4,R80711,2013-01-06T11:20:00+01:00,-0.76,-1.0,0.0,0.4,13.78,13.0,14.0,0.35,1041.27,960.59998,1157.36,64.559998,1052.1,249.23,1557.42,374.07001,87.900002,0.78,159.72,46.110001,75.080002,21.92,109.34,32.619999,124.47,39.889999,183.75,32.650002,0.77,1.0,0.03,0.28,1039.77,959.04999,1155.9399,64.589996,44.32,43.599998,44.849998,0.24,43.43,43.0,43.799999,0.18,64.919998,63.400002,66.0,0.48,56.849998,56.25,57.450001,0.31,55.709999,54.849998,56.5,0.45,49.860001,49.25,50.450001,0.23,50.950001,50.400002,51.200001,0.09,278.87,278.87,278.87,0.0,22.59,17.700001,25.299999,2.92,4.66,2.66,6.47,0.68,4.86,3.01,6.55,0.7,4.76,2.84,6.47,0.67,278.01001,205.60001,336.29999,16.93,,,,,,,,,-0.86,-73.269997,57.419998,16.93,6.57,6.5,6.7,0.09,49.98,49.959999,50.009998,0.01,700.78003,699.06,702.03003,0.66,9.9,9.11,11.01,0.62,23.700001,23.6,23.799999,0.02,785.60999,7.49,1321.53,381.57999,,,,,293.01001,,,,293.87,,,


In [4]:
# Checking shapes of windfarm's dataframes:

print('Shape 2013-2016:', df_13_16.shape)
print('Shape 2017-2018:', df_17_18.shape)

Shape 2013-2016: (840380, 138)
Shape 2017-2018: (217588, 138)


In [5]:
# Checking the number of wind turbines and number records/turbine:
df_13_16['Wind_turbine_name'].value_counts() # 4 turbines; 2013 - 2016

R80711    210095
R80721    210095
R80736    210095
R80790    210095
Name: Wind_turbine_name, dtype: int64

In [6]:
# Checking the number of wind turbines and number records/turbine:
df_17_18['Wind_turbine_name'].value_counts() # 4 turbines 2017-2018

R80721    54433
R80736    54433
R80711    54433
R80790    54289
Name: Wind_turbine_name, dtype: int64