In [1]:
import sys
sys.path.insert(0,'..')

import dask_md_objs as dmdo
import pickle
import numpy as np
from typing import List

In [2]:
dataloc = "../../data/compressed_experimental_out.pickle"
data: dmdo.TaskHandler = None
with open(dataloc, 'rb') as f:
    data = pickle.load(f)

In [3]:
incoming: List[dmdo.WXferEvent] = data.return_all_wxfer_events(dmdo.TransferTypeEnum.INCOMING)
outgoing: List[dmdo.WXferEvent] = data.return_all_wxfer_events(dmdo.TransferTypeEnum.OUTGOING)

In [4]:
print("Returned lengths:\nINCOMING: {a}\t\tOUTGOING: {b}".format(a=len(incoming), b=len(outgoing)))

Returned lengths:
INCOMING: 3731		OUTGOING: 3247


Need to ensure that every transfer is accounted for by both an incoming and an outgoing event. How is this represented?

First, checking for an easy way out -- is every outgoing/incoming only ever representing 1 task?

In [5]:
o_n_1_task = [o.is_only_1_task() for o in outgoing]
print(o_n_1_task.count(True))
print(o_n_1_task.count(False))
i_n_1_task = [i.is_only_1_task() for i in incoming]
print(i_n_1_task.count(True))
print(i_n_1_task.count(False))

2895
352
3365
366


Sadly, not -- both have instances where multiple tasks are implicated.

In [6]:
print(outgoing[0])

Worker Transfer Event (Type: TransferTypeEnum.OUTGOING)
	Event time: 2024-04-18 11:56:08.261950
	Requestor (Them): tcp://10.201.0.229:37485	Fulfiller (Me): tcp://10.201.0.212:38553
	Start: 2024-04-18 11:56:08.497757	Middle: 2024-04-18 11:56:08.220428	End: 2024-04-18 11:56:08.509850	(Duration: 0.012092113494873)
	Total Transfer: 56
	Affiliated Keys:
		finalize-6b762101-9963-45d6-9013-208550e52467
		finalize-0201b5ac-c867-450e-a02d-084c45cb9afb
		finalize-b6180e12-5d43-4e5d-86ed-136681ea0f5c



In [7]:
print(incoming[0])

Worker Transfer Event (Type: TransferTypeEnum.INCOMING)
	Event time: 2024-04-18 11:56:08.228087
	Requestor (Them): tcp://10.201.0.212:38553	Fulfiller (Me): tcp://10.201.0.229:37485
	Start: 2024-04-18 11:56:08.482332	Middle: 2024-04-18 11:56:08.488770	End: 2024-04-18 11:56:08.495209	(Duration: 0.0128772258758544)
	Total Transfer: 56
	Affiliated Keys:
		finalize-6b762101-9963-45d6-9013-208550e52467
		finalize-0201b5ac-c867-450e-a02d-084c45cb9afb
		finalize-b6180e12-5d43-4e5d-86ed-136681ea0f5c



Follow up question: there are more incoming than outgoing. Why? Are there duplicated events, with the only difference being the requestor or the fulfiller?

In [19]:
mat_req: List[List[bool]] = []
mat_ful: List[List[bool]] = []
mat_all: List[List[bool]] = []
counts_req_t: List[int] = []
counts_ful_t: List[int] = []
counts_all_t: List[int] = []
for index, event in enumerate(incoming) :
    mat_req.append([])
    mat_ful.append([])
    mat_all.append([])
    for index2, event2 in enumerate(incoming[index+1:]) :
        mat_req[index].append(event.identical_except_requestor(event2))
        mat_ful[index].append(event.identical_except_fulfiller(event2))
        mat_all[index].append(event == event2)
    counts_req_t.append(mat_req[-1].count(True))
    counts_ful_t.append(mat_ful[-1].count(True))
    counts_all_t.append(mat_all[-1].count(True))

In [20]:
print("of {n} requestor rows, {m} had 0 Trues.".format(n=len(counts_req_t), m=counts_req_t.count(0)))
print("of {n} fulfiller rows, {m} had 0 Trues.".format(n=len(counts_ful_t), m=counts_ful_t.count(0)))
print("of {n} all rows, {m} had 0 Trues.".format(n=len(counts_all_t), m=counts_all_t.count(0)))

of 3731 requestor rows, 3731 had 0 Trues.
of 3731 fulfiller rows, 3731 had 0 Trues.
of 3731 all rows, 3731 had 0 Trues.


Follow up question: Do there exist cases where an incoming has an outgoing with reversed requestor/fulfiller?

In [21]:
# this is going to be some horrifying code: for every incoming, check if there exists an outgoing with reversed fulfiller/requestor
reversed_in_out = []
for i in range(0, len(incoming)) :
    for j in range(0, len(outgoing)) :
        if incoming[i].fulfiller == outgoing[j].requestor and \
            incoming[i].requestor == outgoing[j].fulfiller :
            reversed_in_out.append((incoming[i], outgoing[j]))

In [25]:
print("Of {n} possible matches, there were {m} ({o:0.2f}%) found pairs of matching incoming\outgoing messages.".format(n=len(incoming)*len(outgoing), m=len(reversed_in_out), o=len(reversed_in_out)/(len(incoming) * len(outgoing))))

Of 12114557 possible matches, there were 226757 (0.02%) found pairs of matching incoming\outgoing messages.


### Debugging Equivalency Tests

Resolved now, but previously:

`incoming[0].__eq__(incoming[0])` was returning `False`, but `outgoing[0].__eq__(outgoing[0])` was returning `True`. Did the exploration below; turns out that when `transfertype` was `TransferTypeEnum.INCOMING`, `compressed` is actually not provided, which results in `incoming[0].compressed` being `nan`. Importantly, `nan != nan`, so when `__eq__` checked if `self.compressed == other.compressed`, it was returning `False`.

This has been resolved by changing all equivalency checks in `WXferEvent` to only check if `compressed` is equals when `self.transfertype == TransferTypeEnum.OUTGOING`.

In [11]:
print(incoming[0].start == incoming[0].start)
print(incoming[0].stop == incoming[0].stop)
print(incoming[0].middle == incoming[0].middle)
print(incoming[0].duration == incoming[0].duration)
print(incoming[0].keys == incoming[0].keys)
print(incoming[0].total == incoming[0].total)
print(incoming[0].bandwidth == incoming[0].bandwidth)
print(incoming[0].compressed == incoming[0].compressed)
print(incoming[0].requestor == incoming[0].requestor)
print(incoming[0].fulfiller == incoming[0].fulfiller)
print(incoming[0].transfer_type == incoming[0].transfer_type)
print(incoming[0].time == incoming[0].time)

True
True
True
True
True
True
True
False
True
True
True
True


In [12]:
print(incoming[0].compressed)
print(incoming[0].compressed)
print(incoming[0].compressed == incoming[0].compressed)

nan
nan
False


Issue is that compressed is not always defined, and `nan != nan`. Is there a pattern for when compressed is not defined?

In [None]:
n_nan_inc = [np.isnan(n) for n in [i.compressed for i in incoming]].count(True)
n_nan_out = [np.isnan(n) for n in [o.compressed for o in outgoing]].count(True)

print("Number of incoming messages: {n}\t Nans: {m}".format(n=len(incoming), m=n_nan_inc))
print("Number of outgoing messages: {n}\t Nans: {m}".format(n=len(outgoing), m=n_nan_out))

Number of incoming messages: 3731	 Nans: 3731
Number of outgoing messages: 3247	 Nans: 0


In [14]:
outgoing[0].__eq__(outgoing[0])

True

In [15]:
incoming[0].__eq__(incoming[0])

True