# Exploratory Data Analysis

 - wind forecast and wind power from __2009/07/01 to 2011/01/01__, the initial train phase
 - wind forecast and wind power on 36 hours phases between each of the 157 test periods on which you can retrain you models

In [1]:
import pandas as pd
import seaborn as sns
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
training_data_1 = pd.read_csv("Data/Initial/train.csv")
testing_data_1 = pd.read_csv("Data/Initial/test.csv")

wp_1_forecast = pd.read_csv("Data/Initial/wp1.csv")
wp_2_forecast = pd.read_csv("Data/Initial/wp2.csv")
wp_3_forecast = pd.read_csv("Data/Initial/wp3.csv")
wp_4_forecast = pd.read_csv("Data/Initial/wp4.csv")
wp_5_forecast = pd.read_csv("Data/Initial/wp5.csv")
wp_6_forecast = pd.read_csv("Data/Initial/wp6.csv")

In [4]:
wp_1_forecast.head(48)

Unnamed: 0,date,hors,u,v,ws,wd
0,2009070100,1,3.14,-3.62,4.79,139.09
1,2009070100,2,2.96,-3.56,4.63,140.26
2,2009070100,3,2.89,-3.46,4.51,140.15
3,2009070100,4,2.87,-3.33,4.4,139.26
4,2009070100,5,2.8,-3.19,4.25,138.73
5,2009070100,6,2.57,-3.07,4.0,140.0
6,2009070100,7,2.14,-2.97,3.66,144.27
7,2009070100,8,1.67,-2.86,3.32,149.72
8,2009070100,9,1.43,-2.7,3.06,152.18
9,2009070100,10,1.56,-2.46,2.91,147.59


In [5]:
testing_data_1.tail()

Unnamed: 0,date
7435,2012062420
7436,2012062421
7437,2012062422
7438,2012062423
7439,2012062500


In [6]:
test_start_date = '2011-01-01 01:00:00'
test_end_date = '2012-06-25 00:00:00'

# Functions

In [7]:
def integer2date(s):
    s = str(s)
    return  datetime(year=int(s[0:4]), month=int(s[4:6]), day=int(s[6:8]), hour=int(s[8:]))

def adding_hours(h):
    return timedelta(hours = h)

def date_conversion(df):
    df['date_conv'] = df.date.apply(lambda x: integer2date(x))
    df['hours_added'] = df.hors.apply(lambda x: adding_hours(x))
    df['date'] = df['date_conv'] + df['hours_added']
    df = df.drop(columns = ['date_conv', 'hours_added', 'hors'], axis = 1)
    return df

In [8]:
def finding_batch_forecast(df):
    df['forecast_nb'] = 0
    df['gap'] = 0
    for i in range(int(len(df)/12)):
        nb = (i+1) % 4
        if nb == 0:
            nb = 4
        df.iloc[12*i:12*(i+1), 5] = nb
        for j in range(12):
            df.iloc[12*i+j, 6] = j+1
        
    df1 = df[df.forecast_nb == 1].set_index('date')
    df2 = df[df.forecast_nb == 2].set_index('date')
    df3 = df[df.forecast_nb == 3].set_index('date')
    df4 = df[df.forecast_nb == 4].set_index('date') 
    
    df1['u_1'] = df1['u']
    df1['v_1'] = df1['v']
    df1['wd_1'] = df1['wd']
    df1['ws_1'] = df1['ws']    
    
    df1['u_2'] = df2['u']
    df1['v_2'] = df2['v']
    df1['wd_2'] = df2['wd']
    df1['ws_2'] = df2['ws']

    df1['u_3'] = df3['u']
    df1['v_3'] = df3['v']
    df1['wd_3'] = df3['wd']
    df1['ws_3'] = df3['ws']

    df1['u_4'] = df4['u']
    df1['v_4'] = df4['v']
    df1['wd_4'] = df4['wd']
    df1['ws_4'] = df4['ws']
    
    df1 = df1.drop(columns = ['u', 'v', 'wd', 'ws', 'forecast_nb'], axis = 1)
    
    return df1

# Training data 

In [9]:
training_data_1['date'] = training_data_1.date.apply(lambda x: integer2date(x))

# WP1 analysis

In [10]:
wp_1_training = training_data_1[['date', 'wp1']]

In [11]:
wp_1_forecast = date_conversion(wp_1_forecast)
wp_1_forecast = finding_batch_forecast(wp_1_forecast)

In [13]:
wp_1_forecast = wp_1_forecast.reset_index()
wp_1_forecast.head(24)

Unnamed: 0,index,date,gap,u_1,v_1,wd_1,ws_1,u_2,v_2,wd_2,ws_2,u_3,v_3,wd_3,ws_3,u_4,v_4,wd_4,ws_4
0,0,2009-07-01 01:00:00,1,3.14,-3.62,139.09,4.79,,,,,,,,,,,,
1,1,2009-07-01 02:00:00,2,2.96,-3.56,140.26,4.63,,,,,,,,,,,,
2,2,2009-07-01 03:00:00,3,2.89,-3.46,140.15,4.51,,,,,,,,,,,,
3,3,2009-07-01 04:00:00,4,2.87,-3.33,139.26,4.4,,,,,,,,,,,,
4,4,2009-07-01 05:00:00,5,2.8,-3.19,138.73,4.25,,,,,,,,,,,,
5,5,2009-07-01 06:00:00,6,2.57,-3.07,140.0,4.0,,,,,,,,,,,,
6,6,2009-07-01 07:00:00,7,2.14,-2.97,144.27,3.66,,,,,,,,,,,,
7,7,2009-07-01 08:00:00,8,1.67,-2.86,149.72,3.32,,,,,,,,,,,,
8,8,2009-07-01 09:00:00,9,1.43,-2.7,152.18,3.06,,,,,,,,,,,,
9,9,2009-07-01 10:00:00,10,1.56,-2.46,147.59,2.91,,,,,,,,,,,,


In [11]:
wp_1_forecast

Unnamed: 0,date,gap,u_1,v_1,wd_1,ws_1,u_2,v_2,wd_2,ws_2,u_3,v_3,wd_3,ws_3,u_4,v_4,wd_4,ws_4
0,2009-07-01 01:00:00,1,3.14,-3.62,139.09,4.79,,,,,,,,,,,,
1,2009-07-01 02:00:00,2,2.96,-3.56,140.26,4.63,,,,,,,,,,,,
2,2009-07-01 03:00:00,3,2.89,-3.46,140.15,4.51,,,,,,,,,,,,
3,2009-07-01 04:00:00,4,2.87,-3.33,139.26,4.40,,,,,,,,,,,,
4,2009-07-01 05:00:00,5,2.80,-3.19,138.73,4.25,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26119,2012-06-23 08:00:00,8,-0.20,-1.37,188.45,1.38,-0.62,-1.52,202.15,1.64,0.23,-2.02,173.53,2.03,0.13,-2.16,176.60,2.17
26120,2012-06-23 09:00:00,9,-0.01,-1.11,180.61,1.11,-0.69,-1.25,208.73,1.43,0.35,-1.94,169.70,1.97,0.30,-2.13,172.02,2.15
26121,2012-06-23 10:00:00,10,0.04,-1.00,177.91,1.00,-0.85,-0.96,221.45,1.28,0.38,-1.80,168.12,1.84,0.34,-2.05,170.55,2.08
26122,2012-06-23 11:00:00,11,0.07,-0.96,175.68,0.97,-0.92,-0.72,232.04,1.16,0.39,-1.63,166.44,1.68,0.36,-1.93,169.44,1.97


In [16]:
wp_1_forecast[(wp_1_forecast.date >= test_start_date) & (wp_1_forecast.date <= test_end_date)].head(48)

Unnamed: 0,date,gap,u_1,v_1,wd_1,ws_1,u_2,v_2,wd_2,ws_2,u_3,v_3,wd_3,ws_3,u_4,v_4,wd_4,ws_4
13176,2011-01-01 01:00:00,1,2.27,7.04,17.85,7.4,0.94,7.4,7.23,7.46,1.17,7.71,8.6,7.8,0.52,7.2,4.1,7.21
13177,2011-01-01 02:00:00,2,1.95,6.93,15.69,7.19,0.99,7.18,7.82,7.25,0.89,7.74,6.55,7.79,0.4,7.11,3.25,7.12
13178,2011-01-01 03:00:00,3,1.49,7.09,11.87,7.24,1.14,7.1,9.09,7.19,0.65,7.8,4.75,7.82,0.32,7.22,2.52,7.23
13179,2011-01-01 04:00:00,4,0.95,7.43,7.27,7.5,1.3,7.24,10.14,7.36,0.44,7.88,3.22,7.89,0.26,7.52,2.0,7.52
13180,2011-01-01 05:00:00,5,0.37,7.84,2.67,7.85,1.28,7.49,9.67,7.6,0.19,7.99,1.33,7.99,0.1,7.86,0.71,7.86
13181,2011-01-01 06:00:00,6,-0.22,8.17,358.46,8.17,0.87,7.69,6.42,7.74,-0.23,8.13,358.4,8.14,-0.35,8.06,357.51,8.07
13182,2011-01-01 07:00:00,7,-0.76,8.33,354.78,8.36,-0.04,7.73,359.67,7.73,-0.84,8.3,354.24,8.35,-1.16,8.03,351.79,8.11
13183,2011-01-01 08:00:00,8,-1.18,8.38,351.96,8.46,-1.13,7.77,351.7,7.85,-1.42,8.51,350.55,8.62,-1.99,7.97,345.98,8.21
13184,2011-01-01 09:00:00,9,-1.41,8.43,350.49,8.55,-1.97,7.99,346.14,8.23,-1.67,8.74,349.2,8.9,-2.4,8.16,343.59,8.51
13185,2011-01-01 10:00:00,10,-1.42,8.57,350.61,8.68,-2.24,8.54,345.3,8.83,-1.4,9.0,351.17,9.11,-2.12,8.81,346.48,9.06


# WP2 analysis

In [13]:
wp_2_training = training_data_1[['date', 'wp2']]

In [14]:
wp_2_forecast = date_conversion(wp_2_forecast)
wp_2_forecast = finding_batch_forecast(wp_2_forecast)