In [None]:
# Sources
# Implementation adapted from: 
# [1] https://deparkes.co.uk/2021/09/05/python-timeline-plot/
# [2] https://matplotlib.org/stable/gallery/lines_bars_and_markers/broken_barh.html

# Imports 
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Plot configuration constants
WIDTH = 1/3
offset = WIDTH

In [None]:
source = pd.read_csv("./books.csv")

source['start'] = pd.to_datetime(source['start'])
source['end'] = pd.to_datetime(source['end'])
source['diff'] = source['end'] - source['start']
source['series'] = source['series'].fillna("-")
source

In [None]:
# Convert dates to a floating point representation of the year from Unix seconds timestamp
source.start = source.start.map(lambda x: (x.timestamp()/(3600*24*365))+1970)

In [None]:
def checkCollision(a,b):
    # checks if the two elements have a non-zero intersect, returning true if they do

    # setup
    a_s = a['start']
    a_e = a_s + a['duration']
    a_se = a_s + len(a['name']) * 1/10  # string ending, to avoid overlapping duplication

    b_s = b['start']
    b_e = b_s + b['duration']
    b_se = b_s + len(b['name']) * 1/10  # string ending, to avoid overlapping duplication

    # If one element starts after the other ends, we're good
    if a_s > b_e and a_s > b_se:     # Make sure both have been passed to consider risk done
        return False
    if b_s > a_e and b_s > a_se:
        return False
    
    # if one event hasn't ended by the time that the next one has started,
    # there is a collision
    return True

In [None]:
# Group tables by common series
series_groups = {}
series_data = {}
series_idx = {}
idx = -1

y_ticks = []
position = 0

for series in source.series.unique():
    idx+=1
    series_groups[series] = source[source.series == series]   # sg[s] = data | where {$_.series = series}

    series_data[series] = []
    series_idx[series] = idx

    max_offset = 0
    y_ticks.append(position)

    for i, row in series_groups[series].sort_values(by='start').reset_index().iterrows():
        submit_data = {
            "start": row.start,
            "duration": row['diff'].days/365,
            "name": row['name'].strip(),
            "position": position
        }

        # Adjust submit_data by offsetting for any other matching elements in the series
        collisions = [d for d in series_data[series] if d["position"] == submit_data["position"] and checkCollision(d, submit_data)]
        while len(collisions) != 0:
            c = collisions[0]
            print(f"input point '{submit_data['name']}' ({round(submit_data['start'],3)}, {round(submit_data['start'] + submit_data['duration'],3)})", "\tcollided with:", f"'{c['name']}' ({round(c['start'],3)}, {round(c['start'] + c['duration'],3)})")
            submit_data['position'] += offset
            collisions = [d for d in series_data[series] if d["position"] == submit_data["position"] and checkCollision(d, submit_data)]

        # Track the farthest out of alignment that this series has come
        max_offset = max(submit_data['position'] - position, max_offset)

        # Add the element
        series_data[series].append(submit_data)

    # move it up!
    position += 1.5 * offset + max_offset

In [None]:
# next 5 directly from the source [1], except y_tick_labels
fig, gnt = plt.subplots(figsize=(16,9)) 
y_tick_labels = source.series.unique()
gnt.set_yticks(y_ticks)
gnt.set_yticklabels(y_tick_labels)

def get_color():
    colors = [
        'orange', 'lightgreen', 'lightblue', 'tan', 'yellow'
    ]
    return np.random.choice(colors)

for series in source.series.unique():
    for row in series_data[series]:
        gnt.broken_barh([(row['start'], row['duration'])], 
                        (row['position'] - WIDTH/2, WIDTH), 
                        facecolors = get_color(),
                        label=row['name'])
        gnt.text(row['start'], row['position'] - WIDTH/20, row['name'])