In [195]:
# Data manipulation
import pandas as pd
import numpy as np
from collections import deque
import csv
import dask.dataframe

import os
import time
import datetime
from itertools import count

# Options for pandas
pd.options.display.max_columns = 50
pd.options.display.max_rows = 30

# Visualizations
import plotly
import plotly.graph_objs as go
import plotly.offline as ply

plotly.offline.init_notebook_mode(connected=True)

import cufflinks as cf

cf.go_offline(connected=True)
cf.set_config_file(theme="white")

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.animation as animation

# Autoreload extension
if "autoreload" not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
# nb_black extension
if "nb_black" not in get_ipython().extension_manager.loaded:
    %load_ext nb_black

%autoreload 2

<IPython.core.display.Javascript object>

In [124]:
filepath = "../WindowsTest/TestData.csv"
data = pd.read_csv(filepath)
data["Datetime"] = pd.to_datetime(data["Datetime"])

dask_data = dask.dataframe.read_csv(filepath)
dask_data["Datetime"] = dask.dataframe.to_datetime(dask_data["Datetime"])

<IPython.core.display.Javascript object>

In [151]:
def binSearchDatetime(
    Datetime: dask.dataframe.core.Series, target_datetime: datetime.datetime
) -> int:
    # Searches the dask list for the index of the greatest time smaller than target_datetime
    # Inputs:
    # - Datetime (Sorted list of datetime.datetime objects, in dask series format)
    # - target_datetime (datetime.datetime)
    #
    # Outputs:
    # - L_idx (int) - Index of the greatest datetime in the list smaller than target_datetime

    #  Assume that Datetime is already sorted
    L_idx = 0
    R_idx = len(Datetime) - 1
    L = Datetime.loc[L_idx].compute().item()
    R = Datetime.loc[R_idx].compute().item()

    idx_width = R_idx - L_idx

    assert L < target_datetime < R

    while idx_width > 1:
        M_idx = np.ceil(np.mean([L_idx, R_idx]))
        M = Datetime.loc[M_idx].compute().item()

        if M > target_datetime:
            R_idx = M_idx
        elif M < target_datetime:
            L_idx = M_idx
        elif M == target_datetime:
            R_idx = M_idx
            L_idx = M_idx
        elif Datetime.loc[L_idx].compute().item() == target_datetime:
            R_idx = L_idx

        idx_width = R_idx - L_idx

    return L_idx



5256.0


<IPython.core.display.Javascript object>

In [None]:
window_end_time = datetime.datetime.now()
window_end_time = window_end_time.replace(microsecond=0)

history_window = datetime.timedelta(minutes=20)

window_start_time = window_end_time - history_window

window_start_idx = binSearchDatetime(dask_data["Datetime"], window_start_time)
window_end_idx = len(dask_data) - 1

assert window_start_idx < window_end_idx

D = dask_data["Datetime"].loc[window_start_idx:window_end_idx].compute()
H = dask_data["Humidity"].loc[window_start_idx:window_end_idx].compute()



In [227]:
with open(filepath, "r") as textfile:
    p = csv.reader(reversed_lines(textfile))
    print(next(p))

['20211-09-26 16:22:05', '-5.75', '-7.75']


<IPython.core.display.Javascript object>

In [223]:
def reversed_lines(f):
    "Generate the lines of file in reverse order."
    part = ""
    for block in reversed_blocks(f):
        for c in reversed(block):
            if c == "\n" and part:
                yield part[::-1]
                part = ""
            part += c
    if part:
        yield part[::-1]


def reversed_blocks(f, blocksize=100):
    "Generate blocks of file's contents in reverse order."
    f.seek(0, os.SEEK_END)
    here = f.tell()
    while 0 < here:
        delta = min(blocksize, here)
        here -= delta
        f.seek(here, os.SEEK_SET)
        yield f.read(delta)

<IPython.core.display.Javascript object>