In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

%matplotlib inline

In [None]:
PATH = os.path.abspath("/nfs/homedirs/ursulean/project-4/datasets/weather/40000_5_100_2_0.npy")

In [None]:
data, configs, config_indices = np.load(PATH, allow_pickle=True)
print(data.shape)

In [None]:
def plot_values(m, n_examples=2, rand=True):
    fig, axarr = plt.subplots(ncols=n_examples, nrows=2, figsize=(30, 10))
    indices = range(n_examples) if not rand else np.random.randint(0, len(data), size=(n_examples))
    for i, index in enumerate(indices):
        for atom_i in range(m.shape[1]):
            axarr[0, i].plot(m[index, atom_i, :, 0])
            axarr[1, i].plot(m[index, atom_i, :, 1])

In [None]:
plot_values(data, rand=False)

In [None]:
plt.title("Avg. temperature distribution")
sns.distplot(data[:, :, :, 0].flatten())
plt.figure()
plt.title("2nd. feature distribution")
sns.distplot(data[:, :, :, 1].flatten())

In [None]:
mean_temp = data[:, :, :, 0].mean()
std_temp = data[:, :, :, 0].std()
mean_humid = data[:, :, :, 1].mean()
std_humid = data[:, :, :, 1].std()

data_norm = data.copy()
data_norm[:,:,:,0] = (data_norm[:,:,:,0] - mean_temp) / std_temp
data_norm[:,:,:,1] = (data_norm[:,:,:,1] - mean_humid) / std_humid
plot_values(data_norm, n_examples=3, rand=False)

In [None]:
print("Stations interactions for first Example")
sns.pairplot(pd.DataFrame(data_norm[0, :, :, 0]).swapaxes(0, 1))

In [None]:
def exp_moving_avg(data, beta=0.3):
    smoothed = data.copy()
    timesteps = data.shape[2]
    for step in range(1, timesteps):
        smoothed[:, :, step, :] = beta * data[:, :, step, :] + (1 - beta) * smoothed[:, :, step - 1, :]
    return smoothed

In [None]:
data_smoothed = exp_moving_avg(data_norm)
plot_values(data_smoothed, n_examples=3, rand=False)

In [None]:
def moving_avg(data, window_size=3):
    assert window_size % 2 == 1, "Only symmetric windows allowed."
    smoothed = data.copy()
    timesteps = data.shape[2]
    start_i = window_size // 2
    print(f"Starting at {start_i} with window size {window_size}")
    for step in range(start_i, timesteps - start_i):
        val = sum([data[:, :, step - (start_i) + i] for i in range(window_size)]) / window_size
        smoothed[:, :, step, :] = val
    return smoothed

In [None]:
x = np.asanyarray([[[0.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0]]])
x = x[:,:,:,np.newaxis]
x_smoothed = moving_avg(x, window_size=3)

In [None]:
print((moving_avg(data, window_size=1) == data).all())
print(not (moving_avg(data, window_size=3) == data).all())

In [None]:
data_smoothed = moving_avg(data_norm, window_size=3)
plot_values(data_smoothed, n_examples=3, rand=False)

In [None]:
data_smoothed = moving_avg(data_norm, window_size=9)
plot_values(data_smoothed, n_examples=3, rand=False)

In [None]:
from scipy import fft, ifft


def fft_low_pass(data, percentage_cut=0.2):
    smoothed = data.copy()
    n_features = data.shape[-1]
    n_timesteps = data.shape[-2]
    for feat in range(n_features):
        data_feat = smoothed[:, :, :, feat]
        frequencies = fft(data_feat, axis=-1)
        frequencies[:, :, -int(percentage_cut * n_timesteps):] = 0
        smoothed[:, :, :, feat] = ifft(frequencies)
    return smoothed

In [None]:
data_smoothed = fft_low_pass(data_norm, percentage_cut=.85)
plot_values(data_smoothed, n_examples=3, rand=False)

In [None]:
def save_dataset(filename, data):
    print(f"Saving smoothed data under name {filename}")
    np.save(filename, data, allow_pickle=True)

def smooth_and_save(path, smoothing_function):
    print(f"Loaded data from path {path}")
    data, configs, configs_i = np.load(path, allow_pickle=True)
    
    
    # Standardize data
    mean_temp = data[:, :, :, 0].mean()
    std_temp = data[:, :, :, 0].std()
    mean_humid = data[:, :, :, 1].mean()
    std_humid = data[:, :, :, 1].std()

    data_norm = data.copy()
    data_norm[:,:,:,0] = (data_norm[:,:,:,0] - mean_temp) / std_temp
    data_norm[:,:,:,1] = (data_norm[:,:,:,1] - mean_humid) / std_humid
    
    # Perform smoothing
    print(f"Smoothing data with {smoothing_function.__name__}")
    data_smoothed = smoothing_function(data_norm)
    
    new_filename = path.split(".npy")[0] + "_smoothed.npy" 
    save_dataset(new_filename, [data_smoothed, configs, configs_i])

In [None]:
smooth_and_save(PATH, exp_moving_avg)