### FOBSS Preprocessing Script
---
Notebook for trying first data preprocessing scripts

### Selecting relevant part of Ri Jumps 15A Profile

In [None]:
import numpy as np
import matplotlib.pyplot as plt

cur_master = np.loadtxt('../data/fobss_data/data/Ri Jumps 25A/battery/Battery_Current.csv', delimiter=';')
cur_master = cur_master[:100000]

cur_inv = np.loadtxt('../data/fobss_data/data/Ri Jumps 25A/inverter/Inverter_Current.csv', delimiter=';')
cur_inv = cur_inv[:350000]

volt_slave_0_cell_4 = np.loadtxt('../data/fobss_data/data/Ri Jumps 25A/cells/Slave_0_Cell_Voltages.csv', delimiter=';')
volt_slave_0_cell_4 = volt_slave_0_cell_4[:100000]

In [None]:
plt.plot(cur_master[:,1]) 
cur_master.shape

In [None]:
plt.plot(volt_slave_0_cell_4[:,4])
volt_slave_0_cell_4.shape

### Data Subsample Function

In [None]:
def subsample(sequence, d_t):

    subsampled_sequence = np.array([])

    for i in range(0, len(sequence), d_t - 1) :
        
        if len(sequence) < i + d_t:
            # check if at end of sequence
            window = sequence[i:len(sequence)]
        else:
            window = sequence[i:i + d_t]
        
        # search for most frequent element in observed window
        uniqw, inverse = np.unique(window, return_inverse=True)
        frequent_element_index = np.bincount(inverse).argmax()
        frequent_element = window[frequent_element_index]
    
        # add most frequent element to subsample
        subsampled_sequence = np.append(subsampled_sequence, frequent_element)
        
    return subsampled_sequence

In [None]:
def smoothing(sequence, d_t):

    subsampled_sequence = np.array([])

    for i in range(0, len(sequence), d_t - 1) :
        
        if len(sequence) < i + d_t:
            # check if at end of sequence
            window = sequence[i:len(sequence)]
            seq = np.ones((len(sequence) - i, 1))
        else:
            window = sequence[i:i + d_t]
            seq = np.ones((d_t, 1))
        
        # search for most frequent element in observed window
        uniqw, inverse = np.unique(window, return_inverse=True)
        frequent_element_index = np.bincount(inverse).argmax()
        frequent_element = window[frequent_element_index]
    
        # add most frequent element to subsample
        subsampled_sequence = np.append(subsampled_sequence, seq * frequent_element)
        
    return subsampled_sequence

In [None]:
import numpy as np
import matplotlib.pyplot as plt

cur_master = np.loadtxt('../data/fobss_data/data/Profile 10A/battery/Battery_Current.csv', delimiter=';')
cur_master = cur_master[500:1000]

volt_slave_0_cell_4 = np.loadtxt('../data/fobss_data/data/Profile 10A/cells/Slave_0_Cell_Voltages.csv', delimiter=';')
volt_slave_0_cell_4 = volt_slave_0_cell_4[500:1000]

In [None]:
plt.plot(volt_slave_0_cell_4[:,4])
volt_slave_0_cell_4.shape

In [None]:
subsample = subsample(volt_slave_0_cell_4[:,4], 10)
plt.plot(subsample)
subsample.shape

In [None]:
smoothing = smoothing(volt_slave_0_cell_4[:,4], 10)
plt.plot(smoothing)
smoothing.shape

### Subsampling and Smoothing using libraries

In [None]:
from scipy import ndimage 

def subsample(sequence, d_sample):
    return sequence[::d_sample]

def smooth(sequence, sigma):
    return ndimage.filters.gaussian_filter(sequence, sigma)

# ------------------------------------------- Prepare Test Data -------------------------------------------
test_cur_master = np.loadtxt('../data/fobss_data/data/Ri Jumps 25A/battery/Battery_Current.csv', delimiter=';')
test_cur_master = test_cur_master[80000:100000,1]
test_volt_slave_0_cell_4 = np.loadtxt('../data/fobss_data/data/Ri Jumps 25A/cells/Slave_0_Cell_Voltages.csv', delimiter=';')
test_volt_slave_0_cell_4 = test_volt_slave_0_cell_4[80000:100000, 4]

# subsample and smooth data 
# test_cur_master = subsample(test_cur_master, 10)
# test_cur_master = smooth(test_cur_master, 10)

test_volt_slave_0_cell_4 = subsample(test_volt_slave_0_cell_4, 10)
test_volt_slave_0_cell_4 = smooth(test_volt_slave_0_cell_4, 10)

plt.plot(test_volt_slave_0_cell_4[600:700])

### Align two arrays of different length

In [None]:
from scipy import ndimage 

train_cur_inv = np.loadtxt('../data/fobss_data/data/Profile 10A Run 040618_2/inverter/Inverter_Current.csv', delimiter=';')
train_cur_inv = train_cur_inv[:,1]
train_volt_slave_0_cell_4 = np.loadtxt('../data/fobss_data/data/Profile 10A Run 040618_2/cells/Slave_0_Cell_Voltages.csv', delimiter=';')
train_volt_slave_0_cell_4 = train_volt_slave_0_cell_4[:,4]
train_volt_slave_0_cell_4 = ndimage.filters.gaussian_filter(train_volt_slave_0_cell_4, 10)

factor = train_cur_inv.shape[0] / train_volt_slave_0_cell_4.shape[0]

train_cur_inv_ = list()
for i in range(len(train_volt_slave_0_cell_4)):
    train_cur_inv_.append(train_cur_inv[int(np.round(i * factor))])

train_cur_inv_resized = np.array(train_cur_inv_)

# plot graph
plt.subplots(figsize = (15,15))
plt.subplot(2,1,1)
plt.plot(train_cur_inv, label="Inverter Current")
plt.legend()
plt.subplot(2,1,2)
plt.plot(train_volt_slave_0_cell_4, label="Run 1", color='red')
plt.legend()

### Select multiple simple profiles

In [None]:
# ------------------------------------------- Utility Functions -------------------------------------------
""" Creates subsequences of the original sequence to fit LSTM structure
 
Args:
    sequence_1: the first sequence which gets converted into multiple subarrays of length: n_steps
    sequence_2: the second sequence, each n_steps'th element will be part of the output array
    n_steps: the amount of time steps used as an input into the LSTM for prediction

Returns:
    A tuple of 2 numpy arrays in the required format
    
    X.shape = (X.shape[0] - n_steps, n_steps)
    y.shape = (X.shape[0] - n_steps, 1)

"""
def subsequences(sequence_X, sequence_y, n_steps):
    if n_steps > len(sequence_X):
        raise Exception('subsequences: n_steps should not exceed the sequence length')
    
    X, y = list(), list()
    for i in range(len(sequence_X)):
        end_ix = i + n_steps

        if end_ix > len(sequence_X):
            break

        X.append(sequence_X[i:end_ix])
        y.append(sequence_y[end_ix-1])
        
    return np.array(X), np.array(y)


""" Subsample array to decrease the amount of data

Args:
    sequence: the input array to be subsampled
    d_sample: sample frequency, meaning every d_sample'th element will be part of the output
    
Returns:
    The subsampled array

"""
def subsample(sequence, d_sample):
    return sequence[::d_sample]


""" Smooth array to decrease measurement noise

Args: 
    sequence: the input array to be smoothed
    sigma: parameter for the gauss filtering

Returns:
    The smoothed array
"""
def smooth(sequence, sigma):
    return ndimage.filters.gaussian_filter(sequence, sigma)


""" Aligns two sequences

    In this context this means subsampling the first array so that it afterwards has the same size as the second array
    
Args: 
    sequence_1: arrray to be aligned
    sequence_2: array to be aligned to
    
Returns:
    The algined array
"""
def align(sequence_1, sequence_2):
    if len(sequence_1) < len(sequence_2):
        raise Exception('align: missmatch of sequence lengths')
    
    sample_ratio = sequence_1.shape[0] / sequence_2.shape[0]

    aligned_sequence = list()
    for i in range(len(sequence_2)):
        aligned_sequence.append(sequence_1[int(np.round(i * sample_ratio))])

    aligned_sequence = np.array(aligned_sequence)
    
    return aligned_sequence


""" Prepares the data for input into the LSTM

    Preparation incudes:
    subsampling, smoothing, aligning differnt sized sequences and reshaping the sequence to the requested format
    
Args:
    input_sequence: the input feature sequence
    label_sequence: the output/groud truth sequence
    aligned: indicates if input and label sequence are of equal size or need alignment
    d_sample: sample frequency
    n_steps: the amount of time steps used as an input into the LSTM for prediction
    sigma: parameter for the data smoothing

Returns:
    A tuple of 3 values. The prepared input sequence X, the output sequence of labels y and the scaler component for y. 
    This is needed afterwards to scale the output back to the original value range
"""
def prepare_data(input_sequence, label_sequence, aligned, d_sample, n_steps, sigma):
    # align data if not of equal size
    if not aligned:        
        input_sequence = align(input_sequence, label_sequence)

    # subsample and smooth data 
    input_sequence_ = subsample(input_sequence, d_sample)
    input_sequence_ = smooth(input_sequence_, sigma)
    
    label_sequence_ = subsample(label_sequence, d_sample)
    label_sequence_ = smooth(label_sequence_, sigma)

    # convert into X and y sequences
    X, y = subsequences(input_sequence_, label_sequence_, n_steps)
    y = np.reshape(y, (-1, 1))

    # fit and scale X
    scaler_X = MinMaxScaler(feature_range = (0, 1))
    scaler_X.fit(X)
    X_scaled = scaler_X.transform(X)

    # fit and scale y
    scaler_y = MinMaxScaler(feature_range = (0, 1))
    scaler_y.fit(y)
    y_scaled = scaler_y.transform(y)

    # reshape into correct format
    X_scaled = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)
    
    return X_scaled, y_scaled, scaler_y

def prepare_data_unscaled(input_sequence, label_sequence, aligned, d_sample, n_steps, sigma):
    # align data if not of equal size
    if not aligned:        
        input_sequence = align(input_sequence, label_sequence)

    # subsample and smooth data 
    input_sequence_ = subsample(input_sequence, d_sample)
    input_sequence_ = smooth(input_sequence_, gauss_sigma)
    
    label_sequence_ = subsample(label_sequence, d_sample)
    label_sequence_ = smooth(label_sequence_, gauss_sigma)

    # convert into X and y sequences
    X, y = subsequences(input_sequence_, label_sequence_, n_steps)
    y = np.reshape(y, (-1, 1))

    # reshape into correct format
    X = X.reshape(X.shape[0], X.shape[1], 1)
    
    return X, y

In [None]:
cur_profile_1 = np.loadtxt('../data/fobss_data/data/Profile 10A/inverter/Inverter_Current.csv', delimiter=';')
cur_profile_1 = cur_profile_1[:,1]
volt_profil_1 = np.loadtxt('../data/fobss_data/data/Profile 10A/cells/Slave_0_Cell_Voltages.csv', delimiter=';')
volt_profil_1 = volt_profil_1[:,4]

cur_profile_2 = np.loadtxt('../data/fobss_data/data/Profile -10A/inverter/Inverter_Current.csv', delimiter=';')
cur_profile_2 = cur_profile_2[:,1]
volt_profil_2 = np.loadtxt('../data/fobss_data/data/Profile -10A/cells/Slave_0_Cell_Voltages.csv', delimiter=';')
volt_profil_2 = volt_profil_2[:,4]

cur_profile_3 = np.loadtxt('../data/fobss_data/data/Profile 25A/inverter/Inverter_Current.csv', delimiter=';')
cur_profile_3 = cur_profile_3[:,1]
volt_profil_3 = np.loadtxt('../data/fobss_data/data/Profile 25A/cells/Slave_0_Cell_Voltages.csv', delimiter=';')
volt_profil_3 = volt_profil_3[:,4]

cur_profile_4 = np.loadtxt('../data/fobss_data/data/Profile -25A/inverter/Inverter_Current.csv', delimiter=';')
cur_profile_4 = cur_profile_4[:,1]
volt_profil_4 = np.loadtxt('../data/fobss_data/data/Profile -25A/cells/Slave_0_Cell_Voltages.csv', delimiter=';')
volt_profil_4 = volt_profil_4[:,4]

cur_profile_4_resized = align(cur_profile_4, volt_profil_4)

cur_profile_4_resized = cur_profile_4_resized[:2000]
volt_profil_4 = volt_profil_4[:2000]

plt.subplots(figsize = (5,5))
plt.subplot(2,1,1)
plt.plot(cur_profile_4_resized)
plt.subplot(2,1,2)
plt.plot(volt_profil_4, color='red')