## Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tabulate import tabulate

import btrdb
from btrdb.utils import timez
from btrdb.point import StatPoint, RawPoint
from btrdb.stream import Stream, StreamSet

import re
from typing import List, Tuple
from datetime import datetime, timedelta

## Connect to the database

In [2]:
# Connect to the NI4AI brtdb database.
conn = btrdb.connect("api.ni4ai.org:4411", apikey='9897F6A9C400164A6E5DEF0F')

# Print a list of the available collections.
collections = conn.list_collections()
for collection in collections:
    print(collection)

brownout/centex
brownout/conroe
brownout/whitesands
c37
c37_iowa51_0866cb39
c37_iowa51_0c697f48
c37_iowa51_1408d2ac
c37_iowa51_1b6cffa2
c37_iowa51_1bf98be2
c37_iowa51_243a768b
c37_iowa51_25845c95
c37_iowa51_268447a4
c37_iowa51_28b62158
c37_iowa51_30b95ff1
c37_iowa51_365a8581
c37_iowa51_3c04951a
c37_iowa51_41f27cc6
c37_iowa51_56ec3f25
c37_iowa51_57e9d186
c37_iowa51_68255aaf
c37_iowa53_039f6f78
c37_iowa53_0a2b894c
c37_iowa53_144419db
c37_iowa53_1a02070f
c37_iowa53_1a714cf8
c37_iowa53_2584c47f
c37_iowa53_2606cd2b
c37_iowa53_2e3108da
c37_iowa53_430c8b35
c37_iowa53_490bd268
c37_iowa53_4dba7b0f
c37_iowa53_592e17f7
c37_iowa53_6054502f
c37_iowa53_6e661e92
c37_iowa53_6ec34c36
c37_iowa53_7cd4b7a5
c37_iowa54_079143f7
c37_iowa54_162d968d
c37_iowa54_207403de
c37_iowa54_23a62343
c37_iowa54_241b3ae4
c37_iowa54_2607c649
c37_iowa54_26984b92
c37_iowa54_2e4fa459
c37_iowa54_3629d9f1
c37_iowa54_43ea3b93
c37_iowa54_4542c292
c37_iowa54_45a6e3ca
c37_iowa54_4c22b029
c37_iowa54_589442fd
c37_iowa54_64f1017f
c37_

## Pretty print all collection collection stream

In [3]:
def describe_streams(streams: List[Stream]) -> str:
    """   
    Takes in a streamset from a collection and returns a tabulate
    formatted stringof that dataset's collection name, stream name 
    and stream units (i.e. degrees, amps, volts) and UUID.
    """
    headers = ["Index", "Collection", "Name", "Units"]
    table = []
    
    for index, stream in enumerate(streams):
        tags = stream.tags()
        table.append([
            index, stream.collection, stream.name, tags["unit"]
        ])
    
    return tabulate(table, headers=headers)

In [10]:
# ===================================
collection_name = 'c37'
collection_idx = 0
# ===================================


# Fetch collections from the database connection.
collections = conn.list_collections(collection_name)

# Fetch the specified collection and convert it to a StreamSet.
collection = collections[collection_idx]
streams = conn.streams_in_collection(collection)
streamset = StreamSet(streams)

# Print the StreamSet using tabulate.
stream_table = describe_streams(streamset)
print(stream_table)

stream = streamset[stream_idx]

print(stream.tags())

  Index  Collection           Name             Units
-------  -------------------  ---------------  -------
      0  c37_iowa53_6ec34c36  PH3MAG C1MagAng  Amp
      1  c37_iowa53_430c8b35  DFREQ            ROCOF
      2  c37_iowa51_28b62158  PH5MAG C3MagAng  Amp
      3  c37_iowa51_243a768b  DFREQ            ROCOF
      4  c37_iowa51_1bf98be2  PH1MAG L2MagAng  Volt
      5  c37_iowa51_1408d2ac  TIMEQUAL         TQ
      6  c37_iowa51_0866cb39  PH0MAG L1MagAng  Volt
      7  c37_iowa54_70b5a315  DFREQ            ROCOF
      8  c37_iowa54_589442fd  PH1ANG L2MagAng  degrees
      9  c37_iowa54_3629d9f1  TIMEQUAL         TQ
     10  c37                  PH2MAG VC1       Volt
     11  c37                  PH1ANG VB1       degrees
     12  c37                  PH3MAG C1MagAng  Amp
     13  c37                  DFREQ            ROCOF
     14  c37                  PH0MAG VA1       Volt
     15  c37                  TIMEQUAL         TQ
     16  c37                  PH0ANG VA1       degrees
    

## Choose a stream and print it's metadata

In [5]:
# ===================================
stream_idx = 3
# ===================================

stream = streamset[stream_idx]

# Print the annotations(metadata) connected to our StreamSet.
annotations = stream.annotations()
print("stream annotations: ", annotations)

print(stream.tags())

stream annotations:  ({'location': 'building', 'impedance': '{"source": "PMU5", "target": "PMU2", "pos_sequence": "0.489+j0.59", "neg_sequence": "0.971+j1.476"}'}, 2)
{'name': 'C1MAG', 'unit': 'amps', 'ingress': '', 'distiller': ''}


## Find the timerange of our chosen stream

In [6]:
def get_datetime_range(stream: Stream) -> Tuple[datetime, datetime]:
    """
    Get the earliest  and latest point in the passed stream,
    Fetching just the data point without the version and
    convert from nanoseconds to seconds
    NOTE: without knowing the timezone these might be naive calculations
    """
    first_point, _ = stream.earliest()
    first_epoch = first_point.time / 1e9 
    first_date = datetime.fromtimestamp(first_epoch)

    last_point, _ = stream.latest()
    last_epoch = last_point.time / 1e9
    last_date = datetime.fromtimestamp(last_epoch)

    return first_date, last_date

# Find the range of data (first and last timestampe) in our Stream.
first_timestamp, last_timestamp = get_datetime_range(stream)
print("First data-point in stream: ", first_timestamp)
print("Last data-point in stream: ", last_timestamp)

First data-point in stream:  2016-02-29 16:00:00.008333
Last data-point in stream:  2016-05-31 16:55:48.999999


## Check the entire range of the streams mean


In [7]:
def datetime_to_statpoints(stream: Stream, start: datetime, end: datetime, delta: timedelta) -> Tuple[StatPoint, int]:
    """
    Helper function to convert datetimes and a timedelta
    into the correct nanosecond format and return statpoints
    from a stream.windows call
    """
    start_nano = timez.to_nanoseconds(start)
    end_nano = timez.to_nanoseconds(end)
    width_ns = int(1e9 * delta.total_seconds())

    return stream.windows(start, end, width_ns)


def plot_statpoints(statpoints, aggregation: str) -> None:
    """
    Helper function to quickly plot a series of statpoints
    along the passes aggregation string   
    """
    df = statpoints_to_df(stats)
    plt.figure(figsize=(15, 5))
    plt.plot(df.index, df[aggregation], linewidth=1, color='red', linestyle='--')
    
    
def statpoints_to_df(statpoints) -> pd.DataFrame:
    """
    Converts a group of statpoint objects into a 
    dataframe with aggregation series as columns
    """
    attributes = ['min','mean','max','stddev','count','time']
    
    df = pd.DataFrame([[getattr(p, attr) for attr in attributes] for p, version in statpoints],
                          columns=attributes)
    df.index = pd.to_datetime(df['time'])
    return df

In [8]:
# Plot the entire range of data in our stream.
width = timedelta(minutes=10)
stats = datetime_to_statpoints(stream, first_timestamp, last_timestamp, width)
plot_statpoints(stats, 'mean')

KeyboardInterrupt: 

In [None]:
plot_statpoints(stats, 'time')

In [None]:
plot_statpoints(stats, 'count')

## Zoom in on a chosen point

In [None]:
start = datetime(2016, 4, 1)
end = start + timedelta(minutes=40)
width = timedelta(seconds=30)
stats = datetime_to_statpoints(stream, start, end, width)
plot_statpoints(stats, 'mean')

## Fetch and plot raw data

In [None]:
def rawpoints_to_series(rawpoints: List[RawPoint]) -> pd.Series:
    """
    Takes in a list of rawpoints from a stream.values() 
    call and converts it to a pandas series.
    """
    times, values = [], []
    for rawpoint, _ in rawpoints:
        times.append(timez.ns_to_datetime(rawpoint.time))
        values.append(rawpoint.value)
        
    return pd.Series(index=times, data=values)

In [None]:
# Fetch and plot the data.
rawpoints = stream.values(start, end)
data = rawpoints_to_series(rawpoints)
data.plot()

## Plot the voltage and current of our data

In [None]:
def streamset_to_dataframes(streamset: StreamSet, start: datetime, end: datetime) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Takes in a StreamSet object, using it's filter method to convert the StreamSet
    to voltage and current pandas dataframes containing RawPoints data.
    """
    voltage_stream = streamset.filter(name=re.compile("L[\d]"))
    current_stream = streamset.filter(name=re.compile("C[\d]"))

    voltage_df = voltage_stream.filter(start=start, end=end).to_dataframe()
    current_df = current_stream.filter(start=start, end=end).to_dataframe()
    
    return voltage_df, current_df

In [None]:
# Fetch the raw data into voltage/current dataframes.
voltage_df, current_df = streamset_to_dataframes(streamset, start, end)

# plot the data.
figsize = (15, 5)
voltage_df[voltage_df.filter(like='MAG').columns].plot(figsize=figsize, ylabel="Voltage Magnitude")
voltage_df[voltage_df.filter(like='ANG').columns].plot(figsize=figsize, ylabel="Voltage Angle")
current_df[current_df.filter(like='MAG').columns].plot(figsize=figsize, ylabel="Current Magnitude (amps)")
current_df[current_df.filter(like='ANG').columns].plot(figsize=figsize, ylabel="Current Angle")

In [None]:
def streamset_to_dataframes_2(streamset: StreamSet, start: datetime, end: datetime) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Takes in a StreamSet object, using it's filter method to convert the StreamSet
    to voltage and current pandas dataframes containing RawPoints data.
    """
    voltage_stream = streamset.filter(name=re.compile("L[\d]"))
    current_stream = streamset.filter(name=re.compile("C[\d]"))

    voltage_df = voltage_stream.filter(start=start, end=end).arrow_to_dataframe()
    current_df = current_stream.filter(start=start, end=end).arrow_to_dataframe()
    
#     voltage_df = voltage_df.to_pandas()
#     current_df = current_df.to_pandas()
    return voltage_df, current_df

In [None]:
start = datetime(2016, 4, 1)
end = start + timedelta(minutes=40)
width = timedelta(seconds=30)

voltage_df, current_df = streamset_to_dataframes_2(streamset, start, end)

voltage_df = df_to_radians(voltage_df, "L")
current_df = df_to_radians(current_df, "C")

figsize = (15, 5)
voltage_df[voltage_df.filter(like='ang_unwrap_').columns].plot(figsize=figsize, ylabel="Voltage Angle")
current_df[current_df.filter(like='ang_unwrap_').columns].plot(figsize=figsize, ylabel="Current Angle")

In [None]:
voltage_df.info()

## Unwrap the angles to radians and calculate power

In [None]:
def df_to_radians(df: pd.DataFrame, unit_signifier: str) -> pd.DataFrame:
    """
    Takes in a pandas DataFrame consisting of RawPoints data and 
    unwraps each column into radians, generating new complex data columns 
    (complex_1, complex_2, complex_3), returning the modified DataFrame.
    """
    # Assuming three-phase data.
    complex_col_count = 3
    
    for i in range(1, complex_col_count + 1):
        # Unwrape the phase angle data.
        ang_series = df[f"{collection}/{unit_signifier}{i}ANG"][:]
        ang_series = np.unwrap(np.deg2rad(ang_series))
        df[f"ang_unwrap_{i}"] = ang_series

        # calculate the complex series, with angle as the imaginary units.
        mag_series = df[f"{collection}/{unit_signifier}{i}MAG"][:]
        complex_column = f"{collection}/complex_{i}"
        df[complex_column] = mag_series * np.exp(1j * ang_series)
    
    return df


def calculate_power(voltage_df: pd.DataFrame, current_df: pd.DataFrame) -> List[pd.Series]:
    """
    Performs complex power calculation on the passed voltage and current
    DataFrames, returning the calculation in a list of datapoints.
    """
    col_list = [
        "sunshine/PMU2/complex_1",
        "sunshine/PMU2/complex_2",
        "sunshine/PMU2/complex_3",
    ]
    power_list = []
    
    # multiply voltage with complex current for each complex column.
    for col in col_list:
        complex_voltage = voltage_df[col][:]
        complex_current = current_df[col][:]
        power_list.append(complex_voltage * complex_current)

    return power_list

In [None]:
voltage_df = df_to_radians(voltage_df, "L")
current_df = df_to_radians(current_df, "C")
power_list = calculate_power(voltage_df, current_df)

## Plot the power

In [None]:
def plot_power(power_list: List[pd.Series]) -> None:
    """
    takes in a list of pandas Series representing the calculated 
    complex power of the voltage and current of PMU readings and 
    plots them on absolute, real and reactive power.
    """
    power_func_list = {
        np.abs: "Absolute Power - |S|",
        np.real: "Real Power - P",
        np.imag: "Reactive Power - Q",
    }
    
    # Loop over the chosen functions and plot the resulting Series.
    for func, y_label in power_func_list.items():
        plt.figure(figsize=figsize)
        
        plt.plot(func(power_list[0]), color='red', label='Phase A')
        plt.plot(func(power_list[1]), color='green', label='Phase B')
        plt.plot(func(power_list[2]), color='blue', label='Phase C')
        plt.legend()

        plt.xlabel("nanoseconds")
        plt.ylabel(y_label)

In [None]:
plot_power(power_list)

## Zoom in on a Peak

In [None]:
# Take another look at the full data range for our stream.
width = timedelta(minutes=10)
stats = datetime_to_statpoints(stream, first_timestamp, last_timestamp, width)
plot_statpoints(stats, 'stddev')

In [None]:
# Lets look at the first half of the data.
end = first_timestamp + timedelta(days = 31)
stats = datetime_to_statpoints(stream, first_timestamp, end, width)
plot_statpoints(stats, 'stddev')

In [None]:
# Check the min to avoid an outage.
plot_statpoints(stats, 'min')

In [None]:
# Zoom in on one of the peaks from the previous plot.
start = datetime(2016, 3, 7, 13, 35)
end = start + timedelta(minutes=40)
width = timedelta(seconds = 30)
stats = datetime_to_statpoints(stream, start, end, width)
plot_statpoints(stats, 'stddev')

In [None]:
# Fetch voltage and current DataFrames using our StreamSet.
voltage_df, current_df = streamset_to_dataframes(streamset, start, end)

voltage_df = df_to_radians(voltage_df, "L")
current_df = df_to_radians(current_df, "C")

figsize = (15, 5)
voltage_df[voltage_df.filter(like='ang_unwrap_').columns].plot(figsize=figsize, ylabel="Voltage Angle")
current_df[current_df.filter(like='ang_unwrap_').columns].plot(figsize=figsize, ylabel="Current Angle")

## Plot the voltage and current of our new data

In [None]:
# Fetch voltage and current DataFrames using our StreamSet.
voltage_df, current_df = streamset_to_dataframes(streamset, start, end)

# Plot the results.
voltage_df[voltage_df.filter(like='MAG').columns].plot(figsize=figsize)
voltage_df[voltage_df.filter(like='ANG').columns].plot(figsize=figsize)
current_df[current_df.filter(like='MAG').columns].plot(figsize=figsize)
current_df[current_df.filter(like='ANG').columns].plot(figsize=figsize)

## Calculate and plot power

In [None]:
# Once again calculate the power and plot the results.
voltage_df = df_to_radians(voltage_df, "L")
current_df = df_to_radians(current_df, "C")
power_list = calculate_power(voltage_df, current_df)
plot_power(power_list)