In [58]:
from __future__ import print_function, division
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [108]:
labels = pd.read_csv(
    'labels.dat', sep=' ', index_col=0, header=None, names=['label'])
channels = labels.index
print(list(channels))

[1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 99]


In [160]:
# Load Curent Cost data from disk

all_data = {}

START = 1325376000
END = 1388534400


def to_datetime(unix_timestamps):
    datetimes = pd.to_datetime(unix_timestamps, unit='s', utc=True)
    datetimes = datetimes.tz_convert('Europe/London')
    return datetimes


for i, channel in enumerate(channels):
    filename = 'channel_{:02d}.dat'.format(channel)
    print(filename)
    data = pd.read_csv(
        filename, sep=' ', index_col='index',
        usecols=['index', 'power'], 
        names=['index', 'power', 'device', ''])
    data.index = data.index.astype(np.int32)
    
    # Remove spurious time stamps
    data = data[data.index >= START]
    data = data[data.index < END]
    
    # convert
    data.index = to_datetime(data.index)
    
    # Save data
    all_data[channel] = data

channel_01.dat
channel_05.dat
channel_06.dat
channel_07.dat
channel_08.dat
channel_09.dat
channel_10.dat
channel_11.dat
channel_12.dat
channel_13.dat
channel_14.dat
channel_15.dat
channel_16.dat
channel_17.dat
channel_18.dat
channel_19.dat
channel_20.dat
channel_21.dat
channel_22.dat
channel_23.dat
channel_24.dat
channel_25.dat
channel_26.dat
channel_27.dat
channel_28.dat
channel_29.dat
channel_30.dat
channel_31.dat
channel_32.dat
channel_33.dat
channel_99.dat


In [165]:
# Plot periods where there are no samples for MAX_SAMPLE_PERIOD

MAX_SAMPLE_PERIOD = 300


def get_dropouts(index):
    """
    Parameters
    ----------
    index : Unix timestamps
    
    Returns
    -------
    dropouts : pandas Series
        Each row is a period where data is missing in `index`.
        The index is the start of the period, the value is 
        the end of the period.
    """
    diff = np.diff(index.astype(int) / 1E9)
    i_above_thresh = np.where(diff > MAX_SAMPLE_PERIOD)[0]
    index = to_datetime(index)
    dropout_starts = index[i_above_thresh]
    dropout_ends = index[i_above_thresh+1]
    dropouts = pd.Series(dropout_ends, index=dropout_starts)
    return dropouts


ax = plt.gca()
for i, data in enumerate(all_data.values()):
    dropouts = get_dropouts(data.index)
    color = 'k'
    for start, end in dropouts.iteritems():
        ax.plot([start, end], [i, i], 
                linewidth=5, color=color)
plt.show()

In [159]:
# Just try plotting the index of all the data

ax = plt.gca()
for i, data in enumerate(all_data.values()):
    ax.scatter(
        to_datetime(data.index), [i]*len(data),
        marker=',', alpha=0.25, s=1
    )
plt.show()

In [211]:
# Dropouts rates per minute
# This is probably the most informative plot!

dropout_rates = {}
for channel, data in all_data.iteritems():
    count = pd.Series(1, index=data.index)
    dropout_rate = count.resample(rule='T', how='sum').fillna(0) / 10
    dropout_rates[channel] = dropout_rate
dropout_rates = pd.DataFrame(dropout_rates)

In [262]:
dropout_rates_windowed = dropout_rates.loc[
    "2012-08-07 09:00":"2012-08-07 15:00"]
img = dropout_rates_windowed.values.T

In [263]:
from matplotlib.cm import Blues
import matplotlib.dates as mdates

In [282]:
index = dropout_rates_windowed.index
ax = plt.gca()
im = ax.imshow(
    img, aspect='auto', interpolation='none', cmap=Blues,
    extent=(
        mdates._to_ordinalf(index[0]),
        mdates._to_ordinalf(index[-1]),
        0, len(channels)
    ) # left, right, bottom, top
)

# Format
ax.xaxis.set_major_formatter(
    mdates.DateFormatter('%Y-%m-%d %H:%M', tz=index.tzinfo))
ax.xaxis.set_major_locator(
    mdates.HourLocator())
ax.set_xlabel("Time")
ax.set_ylabel("Channel")
ax.set_title("Proportion of successful packets per minute")
plt.colorbar(im)

fig = plt.gcf()
fig.autofmt_xdate()
plt.show()