In [None]:
import pandas as pd
import numpy as np
import os

In [None]:
DATA_PATH = '/home/chrisams/Documents/datasets/data_TAIM/'
COLUMNS = [
    'Fecha Hora (YYYY-MM-DD HH:MM)',
    'Velocidad de viento en 20.0 metros [mean,m/s]',
    'Velocidad de viento en 10.0 metros [mean,m/s]',
]
V20 = 'Velocidad de viento en 20.0 metros [mean,m/s]'
V10 = 'Velocidad de viento en 10.0 metros [mean,m/s]'

In [None]:
# Read all the data files.
for path, _, file_names in os.walk(DATA_PATH):
    eolic_plants_df = [None] * len(file_names)
    for i, file_name in enumerate(file_names):
        full_path = os.path.join(path, file_name)
        eolic_plants_df[i] = pd.read_csv(full_path)

In [None]:
# Clean data and keep only useful columns and rows.

min_date = None
max_date = None

for i in range(len(eolic_plants_df)):
    plant =  eolic_plants_df[i]
    plant = plant[COLUMNS].set_index(\
                pd.DatetimeIndex(plant['Fecha Hora (YYYY-MM-DD HH:MM)'])\
            ).iloc[:, 1:]
    v10_mean = plant[V10].mean()
    v20_mean = plant[V20].mean()
    alpha = np.log(v10_mean / v20_mean) / np.log(10 / 20)
    
    plant['85m_speed'] = plant[V20] * np.power(85 / 20, alpha)
    
    current_min_date = plant.index.min()
    current_max_date = plant.index.max()
    
    if min_date:
        if min_date < current_min_date:
            min_date = current_min_date
    else:
        min_date = current_min_date
    
    if max_date:
        if max_date > current_max_date:
            max_date = current_max_date
    else:
        max_date = current_max_date

    eolic_plants_df[i] = plant['85m_speed']

for i in range(len(eolic_plants_df)):
    plant = eolic_plants_df[i][min_date:max_date]
    eolic_plants_df[i] = (plant - plant.mean()) / plant.std()

In [None]:
SAVE_DATA_PATH = '/home/chrisams/Documents/datasets/data_TAIM/processed/'

In [None]:
for i, plant in enumerate(eolic_plants_df):
    plant.to_csv(os.path.join(SAVE_DATA_PATH, 'plant_{}'.format(i)))

In [None]:
plant1 = pd.read_csv(os.path.join(SAVE_DATA_PATH, 'plant_1'), index_col=0, names=['85m_speed'], parse_dates=True)

In [None]:
DATA_PATH = '/home/chrisams/Documents/datasets/data_TAIM/processed/'

In [None]:
for path, _, file_names in os.walk(DATA_PATH):
    for file_name in file_names:
        t = pd.read_csv(os.path.join(DATA_PATH, file_name), index_col=0, names=['85m_speed'], parse_dates=True)
        print(t['85m_speed'].values.shape)
        

In [None]:
def load_data(data_path, n_plants, p, resample_rule=None):
    """
    data_path: directory where the data is saved.
    n_plants: number of plants to load (K).
    resample: resample rule for data aggregation.
    """
    data = [None] * n_plants
    # test = [None] * n_plants
    for path, _, file_names in os.walk(data_path):
        for i, file_name in enumerate(file_names):
            if i + 1 > n_plants:
                break
            print('File "{}" loaded!'.format(file_name))
            data[i] = pd.read_csv(os.path.join(data_path, file_name),\
                                  index_col=0, names=['85m_speed'], parse_dates=True)
            if resample_rule:
                data[i] = data[i].resample(resample_rule).mean().interpolate(method='time')
                # test[i] = data[i]

            data[i] = data[i]['85m_speed'].values
            nans = 0
            for j in range(data[i].shape[0]):
                if np.isnan(data[i][j]):
                    nans += 1
            print('nans: {}'.format(nans))
            print(data[i].shape)

    data = np.stack(data, axis=0)
    
    # test = data
    
    if p > 0:
        X = np.zeros((n_plants * p, data.shape[1] - p))
        j = 0
        for i in range(p, data.shape[1]):
            for t in range(p):
                X[t * n_plants:(t + 1) * n_plants, j] = data[:, (i - 1) - t]
            j += 1
    else:
        X = data
    
    data = data[:, p:]
    
    return data, X

# Testing

In [None]:
Y0, X = load_data(DATA_PATH, 6, 3, 'H')

In [None]:
test[0]['2018-01-16':]

In [None]:
plant1 = pd.read_csv(os.path.join(SAVE_DATA_PATH, 'plant_1'), index_col=0, names=['85m_speed'], parse_dates=True).resample('H').mean()

In [None]:
Y0.shape

In [None]:
X.shape

In [None]:
for i in range(X.shape[0]):
    for j in range(X.shape[1]):
        if np.isnan(X[i, j]):
            print(i, j, X[i, j])

In [None]:
for i in range(Y0.shape[0]):
    for j in range(Y0.shape[1]):
        if np.isnan(Y0[i, j]):
            print(i, j, Y0[i, j])

In [None]:
test.shape

In [None]:
print(Y0[:, 0])
print(X[:, 0])
print(test[:, :6])

In [None]:
print(Y0[:,1], Y0[:,2])
print(X[:, 2])

In [None]:
list(range(1, 2))

In [None]:
data = np.array([[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15]])
p = 3
n_plants = 3
print(data)

In [None]:
if p > 1:
    X = np.zeros((n_plants * p, data.shape[1] - p + 1))
    j = 0
    for i in range(p - 1, data.shape[1]):
        for t in range(p):
            X[t * n_plants:(t + 1) * n_plants, j] = data[:, i - t]
        j += 1

In [None]:
X