In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
output_notebook()

In [2]:
# Default Matplotlib Plot of Dataframe Column 
def dfplot(df, col, cond_col="", cond_val=0):
    if cond_col != "":
        c = df[df[cond_col] == cond_val]
        s = c[pd.isnull(c[col]) == False][col]
    else:
        s = df[pd.isnull(df[col]) == False][col]
    s.plot(figsize=(15,5))
    plt.title(col)
    plt.show()

# Sum of Dataframe Column
def dfsum(df, col, cond_col="", cond_val=0):
    if cond_col != "":
        total = df[df[cond_col] == cond_val][col].sum()
    else:
        total = df[col].sum()
    return total

# Matplotlib Histogram of Value Counts in Dataframe Column
def dfplotvc(df, col, cond_col="", cond_val=0):
    if cond_col != "":
        vc = df[df[cond_col] == cond_val][col].value_counts()
    else:
        vc = df[col].value_counts()
    plt.clf()
    plt.figure(figsize=(15,8))
    vc.plot(kind='bar')
    plt.show()  

# Interactive Bokeh Plot of Dataframe Column
# ... note that x axis is a recreated list of item number
# ... and does not reflect original index in dataframe
def dfbokeh(df, col, cond_col="", cond_val=0):
    p = figure(title=col, plot_height=300, plot_width=600)
    if cond_col != "":
        c = df[df[cond_col] == cond_val]
        s = c[pd.isnull(c[col]) == False][col]
    else:
        s = df[pd.isnull(df[col]) == False][col]
    y = s.values
    x = [i for i in range(len(y))]
    r = p.line(x, y, color="#2222aa", line_width=3)
    show(p, notebook_handle=True)

# Creates a QtDialog for Opening a File (local only)
def openfile_dialog():
    from PyQt5 import QtGui
    from PyQt5 import QtGui, QtWidgets
    app = QtWidgets.QApplication([dir])
    fname = QtWidgets.QFileDialog.getOpenFileName(None, "Select a file...", '.', filter="All files (*)")
    return fname

In [None]:
# Run `bin/sigview datasrv.lua <json file>` prior to executing any cell after this

In [None]:
# For environments that support Qt Dialog run the following
#   execute the lines below to select a file that is read into the dataframe

# csvfile = openfile_dialog()
# df = pd.read_csv(csvfile[0])

In [4]:
# For environments that support local file access
#   copy the desired data file into the same directory as this ipynb notebook as "datainput.csv"
#   then execute the line below to read the file into the dataframe

df = pd.read_csv("scihist.csv")

In [5]:
df.head()

Unnamed: 0,GPS,MFC,PCE,TYPE,RWS,RWW,DLBW1,DLBW2,DLBW3,DLBW4,...,MFCERR,HDRERR,FMTERR,DLBERR,TAGERR,PKTERR,DLBS1,DLBS2,DLBS3,DLBS4
1980:6:0:0:0:0,316563796,1,4,3240880.0,4700.0,41,41,0,0,3243678.0,...,0,0,0,0,0,250,248,0,0,
1980:6:0:0:0:0,316563796,1,5,3240900.0,4700.0,41,41,0,0,3243695.6,...,0,0,0,0,0,250,248,0,0,
1980:6:0:0:0:0,316563797,1,4,3240880.0,4680.0,41,41,0,0,3243672.3,...,0,0,0,0,0,250,248,0,0,
1980:6:0:0:0:0,316563797,1,5,3240900.0,4700.0,41,41,0,0,3243677.1,...,0,0,0,0,0,250,248,0,0,
1980:6:0:0:0:0,316563799,1,4,3240880.0,4680.0,41,41,0,0,3243665.9,...,0,0,0,0,0,250,246,0,0,


In [6]:
df.tail()

Unnamed: 0,GPS,MFC,PCE,TYPE,RWS,RWW,DLBW1,DLBW2,DLBW3,DLBW4,...,MFCERR,HDRERR,FMTERR,DLBERR,TAGERR,PKTERR,DLBS1,DLBS2,DLBS3,DLBS4
2019:257:3:59:59:829,316923791,1,5,3360920.0,3740.0,33,0,0,0,0.0,...,0,0,0,0,0,190,0,0,0,
2019:257:3:59:59:849,316923792,1,4,3360900.0,3720.0,33,0,0,0,3362953.5,...,0,0,0,0,0,190,0,0,0,
2019:257:3:59:59:849,316923792,1,5,3360940.0,3740.0,33,0,0,0,0.0,...,0,0,0,0,0,190,0,0,0,
2019:257:3:59:59:869,316923793,1,4,3360900.0,3720.0,33,0,0,0,3362954.9,...,0,0,0,0,0,190,0,0,0,
2019:257:3:59:59:869,316923793,1,5,3360940.0,3740.0,33,0,0,0,0.0,...,0,0,0,0,0,190,0,0,0,


In [7]:
df.describe()

Unnamed: 0,GPS,MFC,PCE,TYPE,RWS,RWW,DLBW1,DLBW2,DLBW3,DLBW4,...,MFCERR,HDRERR,FMTERR,DLBERR,TAGERR,PKTERR,DLBS1,DLBS2,DLBS3,DLBS4
count,711014.0,711014.0,711014.0,711014.0,711014.0,711014.0,711014.0,711014.0,711014.0,711014.0,...,711014.0,711014.0,711014.0,711014.0,711014.0,711014.0,711014.0,711014.0,711014.0,0.0
mean,316743800.0,1.0,4.5,3177969.0,8121.590911,138.747364,71.82658,17.404391,0.114431,2211794.0,...,0.0,0.0,0.0,3e-06,1.4e-05,455.322815,357.166672,22.629867,0.079875,
std,104530.8,0.0,0.5,544395.1,9076.153158,204.829963,143.710018,74.943245,6.568314,1538144.0,...,0.0,0.0,0.0,0.001677,0.005563,558.967321,721.61983,78.419325,6.760104,
min,316563800.0,1.0,4.0,299900.0,200.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
25%,316652700.0,1.0,4.0,3228140.0,3740.0,33.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,164.0,0.0,0.0,0.0,
50%,316743800.0,1.0,4.5,3260420.0,3740.0,41.0,17.0,0.0,0.0,3239930.0,...,0.0,0.0,0.0,0.0,0.0,206.0,2.0,0.0,0.0,
75%,316834900.0,1.0,5.0,3320760.0,7280.0,149.0,65.0,0.0,0.0,3287775.0,...,0.0,0.0,0.0,0.0,0.0,494.0,308.0,0.0,0.0,
max,316923800.0,1.0,5.0,3405840.0,35460.0,1021.0,1021.0,1021.0,703.0,3409349.0,...,0.0,0.0,0.0,1.0,3.0,3300.0,3332.0,2970.0,1024.0,


In [None]:
# Everything below this cell is for example use only #

In [None]:
# Useful to list longer slices of a dataframe
pd.set_option('display.height', 500)
pd.set_option('display.max_rows', 500)

In [None]:
# Display a slice of dataframe
df[1:5]

In [None]:
# Display all rows in dataframe that have certain values in them
# ... in this case, all rows where the type is 4.0
df[df["SciHist[1].TYPE"].isin([4.0])]

In [None]:
# Display the sum of signal strength across the entire dataframe and when condition is met
print(dfsum(df, "A_DFC2.HK.TxPulsesInMajorFrame"))
print(dfsum(df, "SciHist[1].SIGPES", "SciHist[1].TYPE", 5.0))

In [None]:
# Create a value count plot to see what kinds of values are present in the dataframe for a column
dfplotvc(df, "A_DFC2.HK.TxPulsesInMajorFrame")

In [None]:
# Plot a column
dfplot(df, 'SciHist[1].SIGPES', 'SciHist[1].TYPE', 4)

In [None]:
# Create an interactive plot of a column
dfbokeh(df, 'SciHist[1].SIGPES', 'SciHist[1].TYPE', 5)

In [None]:
df.keys()

In [None]:
# Plot
dfbokeh(df, 'RWS')

In [None]:
# Plot
dfbokeh(df, 'TXCNT')

In [None]:
# Plot a PCE1
dfbokeh(df, 'SIGRNG')

In [None]:
# Plot a PCE1
dfbokeh(df, 'A_DFC1.HK.Tracking_FIFOEmpty')

In [None]:
# Plot a PCE2
dfplot(df, 'A_DFC2.HK.TxPulsesInMajorFrame')