# Analysis of the Voltage Signal post-capacitor installation

In [None]:
import altair as alt
import numpy as np
import pandas as pd

from src.data_processing import calibrate_data, create_data_from_entry, unstable_seconds, flatten_data
from src.data_processing import average_data, time_period
from src.vizualization import generate_stability_rules, generate_line_chart

# Render altair charts correctly
alt.renderers.enable('notebook')

# Store the altair char json separately and not in the notebook 
# to reduce notebook size
alt.data_transformers.enable('json')

## Load and clean data

First we load, clean and calibrate the raw data obtained by camonitoring the PV and having the output put into a file.

In [None]:
def clean_camonitored_data(data):
    """
    Cleans file produced from camonitoring a PV.

    Args:
        data (pandas Dataframe): Pandas Dataframe to be parsed.
    Returns:
        cleaned_dataframe (Dataframe): Cleaned dataframe
    """
    columns = ["PV name", "Date", "Time", "NELM"]
    columns.extend(list(range(1, 100 + 1)))
    data.columns = columns

    data["Datetime"] = pd.to_datetime(data["Date"] + ' ' + data["Time"])

    new_columns = ["Datetime"]
    new_columns.extend(list(range(1, 100 + 1)))
    cleaned_dataframe = data[new_columns]
    
    return cleaned_dataframe

data = pd.read_csv(os.path.join(os.getcwd(), "..", "data", "raw", "2018-11-09-voltage-data.txt"), 
                   delim_whitespace=True, header=None, nrows=600)

data = clean_camonitored_data(data)
calibrated_data = calibrate_data(data, 20)
calibrated_data.head()

We can now save this cleaned file to load later.

In [None]:
calibrated_data.to_csv(os.path.join(os.getcwd(), "..", "data", "processed", "post-capacitor-cleand-calibrated-voltage-data.csv"), index=None)

## Analysis of 706 seconds of signal

### Raw data Analysis

Now we flatten each array of data by assigning a time stamp to each element of the array in the following way:

- Given a row,  finding the time delta between this row and the next row,
- Dividing this time delta by the number of elements in an array (100 in this case) to get the time delta between readings.
- For each element of the array, multiple the time delta between readings and the position of the element in the array and add this to the time reading for the row.

Note this assumes that readings are sampled equally spaced apart.

In [None]:
flatten_raw_data = flatten_data(calibrated_data)
flatten_raw_data.head()

In [None]:
flatten_raw_data.shape

In [None]:
stability_rules_layer = generate_stability_rules(flatten_raw_data)
base_line = generate_line_chart(x_title="Time (h:m:s)", y_title="Voltage (kV)", 
                                y_scale=(89,97), time_unit="hoursminutesseconds")

alt.layer(base_line, stability_rules_layer, data=flatten_raw_data, 
          title="Voltage before averaging over {} seconds".format(time_period(flatten_raw_data)),
          width=875,
          config={"background": "white"})

In [None]:
raw_mean = np.mean(flatten_raw_data)
raw_mean

In [None]:
raw_std = np.std(flatten_raw_data)
raw_std

The graph above shows how often the signal is outside of the stability parameters set at $\pm1kV$ from the mean. We also see that the signal has a standard deviation of $2.383439kV$ from a mean of $93.177092kV$.

In [None]:
raw_unstable_seconds = unstable_seconds(flatten_raw_data, float(raw_mean))
raw_unstable_seconds

The number of unstable seconds is 93.29 over this 706 second window.

### Averaging the data

Now we will average the data by averaging values which are 180 degrees apart on the 50Hz wave. As this data was collected while sampling 100 elements at 100Hz (new reading every second) we need to average each pair of elements. We set the timestamp of the average to the later element's timestamp. Assuming the DAQ keeps sampling at 100Hz while sending the data to the IOC, this won't create artifacts at the ends of the packets of data.

In [None]:
filtered_data = average_data(flatten_raw_data)

In [None]:
filtered_data_mean = np.mean(filtered_data)
filtered_data_mean

In [None]:
np.std(filtered_data)

In [None]:
1- np.std(filtered_data)/np.std(flatten_raw_data)

In [None]:
averaged_stability_bounds = generate_stability_rules(filtered_data)

alt.layer(base_line, averaged_stability_bounds, data=filtered_data, 
          title="Voltage after averaging over over {} seconds".format(time_period(filtered_data)),
          width=850,
          config={"background": "white"})

The graph above shows how often the signal is outside of the stability parameters set at $\pm1kV$ from the mean. We also see that the signal has a standard deviation of $0.151717kV$ from a mean of $93.177117kV$. This is a $93.6345\%$ reduction of the standard deviation.

In [None]:
averaged_unstable_second = unstable_seconds(filtered_data)
averaged_unstable_second

In [None]:
1 - averaged_unstable_second/raw_unstable_seconds

The number of unstable seconds is 2.07 seconds over this 706 second window - a 99.13% decrease in the amount of time spent unstable.