In [66]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [67]:
df1 = pd.read_csv("../data1.csv")
name_to_idx = {'outside': 0} | {'r' + str(i): i for i in range(1, 35)} | {'c1': 35, 'c2': 36}
areas = ['r' + str(i) for i in range(1, 35)] + ['c1', 'c2', 'outside']
truth1 = df1[areas].rename(name_to_idx, axis=1)

In [69]:
# display(truth1.head())
# display(truth1.shift().head())

### deltas1 stores the change in people in each room per time period.
deltas1 = (truth1 - truth1.shift()).dropna().reset_index(drop=True)
display(deltas1.head(1))

# Index 200 is interesting - indicates a potential move from c2 to r27, or r13 to r27. 
moves = {K:V for K, V in dict(deltas1.iloc[200]).items() if V != 0}
print(moves)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,28,29,30,31,32,33,34,35,36,0
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


{12: 1.0, 13: -1.0, 27: 1.0, 35: 1.0, 36: -2.0}


In [6]:
candidateNeighbours = {area: set() for area in range(37)}
nTransitions = np.zeros((37, 37))
# We're going to populate the candidateNeighbours dictionary, and manually remove 'bad' looking transitions afterwards

# Transitions made in day 1
for idx, move in deltas1.iloc[:,].iterrows():
    moveFrom = [K for K, V in dict(move).items() if V < 0]
    moveTo = [K for K, V in dict(move).items() if V > 0]
    for start in moveFrom:
        for end in moveTo:
            candidateNeighbours[start].add(end)
            nTransitions[start, end] += 1


In [7]:
for area, neighbours in candidateNeighbours.items():
    print(f"'{area}': {sorted(list(neighbours))}")
    ...

'0': [2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36]
'1': [0, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 20, 22, 24, 25, 26, 27, 28, 29, 30, 32, 35, 36]
'2': [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36]
'3': [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36]
'4': [1, 2, 6, 7, 9, 10, 12, 14, 16, 17, 20, 23, 25, 26, 27, 28, 29, 30, 32, 35, 36]
'5': [1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 25, 26, 27, 28, 29, 30, 35, 36]
'6': [0, 1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 16, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36]
'7': [1, 2, 3, 6, 8, 11, 12, 13, 14, 16, 17, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 35, 36]
'8': [1, 2, 3, 6, 12, 14, 15, 16, 17, 19, 20, 22, 23, 26, 27, 28, 29, 35, 36]
'9': [1, 3, 6, 10, 12, 13, 14, 16, 20

In [59]:
nTransitions

array([[  0.,   0.,   3., ...,   2.,  14.,   7.],
       [  3.,   0.,  74., ...,   0.,  13.,  17.],
       [  6.,  71.,   0., ...,   2.,  36.,  27.],
       ...,
       [  0.,   3.,   4., ...,   0.,   5.,   6.],
       [ 13.,  14.,  33., ...,  12.,   0., 169.],
       [ 12.,  27.,  35., ...,   4., 155.,   0.]])

In [75]:
# Pruning time
removed = 0
for area, neighbours in candidateNeighbours.items():
    toRemove = set()
    for n in neighbours:
        if nTransitions[area, n] < sum(nTransitions[area,]) * 0.01:
            removed += 1
            toRemove.add(n)
    for n in toRemove:
        candidateNeighbours[area].remove(n)
print(f"Removed {removed} insignificant connections")

removed = 0
for area, neighbours in candidateNeighbours.items():
    toRemove = set()
    for n in neighbours:
        if area not in candidateNeighbours[n]:
            # print(f"Removed connection {area}->{n} as there was no connection {n}->{area}")
            removed += 1
            toRemove.add(n)
    for n in toRemove:
        candidateNeighbours[area].remove(n)
print(f"Removed {removed} un-reciprocal connections")

Removed 271 insignificant connections
Removed 282 un-reciprocal connections


### Attempt 2
We still use the deltas1 from above, which indicates the change in room occupancy from step to step.
The idea is to try and decompose this into a guess of what transitions truly occurred.
In a case for e.g. where there the only thing that happens is area 9 increases by 1, and area 36 decreases by 1,
we're going to assume that there was one transition -> 36->9.

Oh. Maybe not.
Consider from above:

```
# Index 200 is interesting - indicates a potential move from c2 to r27, or r13 to r27. 
moves = {K:V for K, V in dict(deltas1.iloc[200]).items() if V != 0}
print(moves)
```

{12: 1.0, 13: -1.0, 27: 1.0, 35: 1.0, 36: -2.0}

Could just be 2 people entering c1, 1 person going from c1 to r27, which is probably more likely...

In [18]:
for idx, step in deltas1.iterrows():
    enters = []
    exits = []
    for K, V in dict(step).items():
        if V == 0:
            continue
        if V > 0:
            enters += [K] * int(V)
        exits += [K] * (-int(V))

    if idx % 10 == 0:
        print(f"Iteration {idx}:")
        print(f"Entrances: {enters}")
        print(f"Exits: {exits}")

    if idx >= 100:
        break


Iteration 10:
Entrances: [14, 14, 0, 0]
Exits: [22, 22, 22, 22]
Iteration 20:
Entrances: [9]
Exits: [36]
Iteration 30:
Entrances: []
Exits: []
Iteration 40:
Entrances: [26]
Exits: [20]
Iteration 50:
Entrances: [22]
Exits: [0]
Iteration 60:
Entrances: [3, 26, 26, 36, 36]
Exits: [12, 14, 20, 35, 35]
Iteration 70:
Entrances: [3]
Exits: [12]
Iteration 80:
Entrances: [12, 26, 27, 27, 27, 34, 36, 36, 36, 36, 36, 36, 36]
Exits: [14, 14, 14, 14, 14, 14, 14, 25, 28, 35, 35, 35, 35]
Iteration 90:
Entrances: [25]
Exits: [29]
Iteration 100:
Entrances: [2, 9, 20, 25]
Exits: [1, 19, 26, 36]


In [149]:
m = np.zeros((37, 37, len(deltas1)))

# 6-27
# 14-27
# 23-27
# 36-27
# 32-30
# 19-27
# 3-14
# 3-16
# 1-12

neighboursDict = {
    0: [14, 22, 24, 35],
    1: [2, 3, 12],
    2: [1, 3, 12, 36],
    3: [1, 2, 12, 14, 16, 35, 36],
    4: [5, 6, 14],
    5: [4, 6],
    6: [4, 5, 14, 22, 27, 35],
    7: [8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 35, 36],
    8: [7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 36],
    9: [7, 8, 10, 11, 12, 13, 15, 16, 17, 18, 32, 36],
    10: [7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 27, 35, 36],
    11: [3, 7, 8, 9, 10, 12, 13, 15, 16, 17, 18, 26, 35, 36],
    12: [1, 2, 3, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 35, 36],
    13: [3, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 34, 35, 36],
    14: [0, 3, 4, 6, 12, 22, 24, 27, 28, 32, 35, 36, 13],
    15: [7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 36],
    16: [3, 7, 8, 9, 10, 11, 12, 13, 15, 17, 18, 35, 36],
    17: [3, 7, 8, 9, 10, 11, 12, 13, 15, 16, 18, 35, 36],
    18: [3, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 35, 36],
    19: [20, 23, 26, 27],
    20: [19, 23, 25, 26, 27, 29, 30, 35],
    21: [26, 27, 32, 35, 36],
    22: [0, 6, 14, 24, 28, 35, 36],
    23: [19, 20, 26, 27, 30],
    24: [14, 22, 27, 28, 34, 35, 0],
    25: [20, 26, 27, 29, 30, 32],
    26: [11, 19, 20, 21, 23, 25, 27, 28, 29, 30, 32, 35, 36],
    27: [6, 10, 14, 20, 21, 23, 24, 25, 26, 28, 30, 31, 32, 35, 36],
    28: [14, 22, 24, 26, 27, 33, 34, 35, 36],
    29: [20, 25, 26, 30],
    30: [20, 23, 25, 26, 27, 29, 32],
    31: [27, 32],
    32: [9, 14, 21, 25, 26, 27, 30, 31, 35],
    33: [28, 34],
    34: [13, 24, 28, 33, 35, 36],
    35: [0, 3, 6, 7, 11, 12, 13, 14, 16, 17, 18, 20, 21, 22, 24, 26, 27, 28, 32, 34, 36],
    36: [2, 3, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 22, 26, 27, 28, 34, 35],
}

for idx, delta in deltas1.iterrows():
    # print(idx)
    truth = truth1.iloc[idx]

    # moves = {K:int(V) for K, V in dict(deltas1.iloc[idx]).items() if V != 0}
    # start = {K:V for K, V in dict(truth1.iloc[idx]).items() if V != 0}
    # end = {K:V for K, V in dict(truth1.iloc[idx+1]).items() if V != 0}
    # print(moves)
    # print(f"Start: {start}")
    # print(f"End: {end}")

    # if not all(deltas1.iloc[idx] == delta):
    #     print(idx)
    #     print(dict(deltas1.iloc[idx]))
    #     print(dict(deltas1.iloc[idx+1]))
    #     print(dict(delta))
    #     assert(False)

    for start in range(37):
        if truth[start] == 0:
            continue

        n_left_start = -min(delta[start], 0)
        # Count total gain of neighbours (only neighbours who gain)
        total_gain_n = 0
        for n in neighboursDict[start]:
            total_gain_n += max(delta[n], 0)

        for end in range(37):
            if start == end:
                m[start, end, idx] = (truth[start] - n_left_start) / truth[start]
            elif end in neighboursDict[start] and total_gain_n != 0:
                m[start, end, idx] = (max(delta[end], 0) / total_gain_n) * (n_left_start / truth[start])
            
        if abs(np.sum(m[start, :, idx]) - 1) < 0.001:
            continue

        print(f"idx = {idx}, Start = {start}, truth[start] = {truth[start]}, total-gain-n = {total_gain_n}, n_left_start = {n_left_start}")
        print({K:int(V) for K, V in dict(delta).items() if V != 0})
        print({K:int(V) for K, V in dict(truth).items() if V != 0})
        print(m[start, :, idx])
        assert(False)

# t_m = np.zeros((37, 37))
# for start, end in zip(range(37), range(37)):
#     t_m[start][end] = np.mean(m[start, end], axis=2)

idx = 1575, Start = 34, truth[start] = 1, total-gain-n = 0.0, n_left_start = 1.0
{9: -1, 10: 1, 12: 1, 13: 2, 34: -1, 36: -2}
{1: 1, 2: 2, 7: 2, 9: 2, 10: 13, 12: 1, 13: 2, 15: 1, 16: 1, 19: 1, 21: 1, 23: 3, 24: 1, 25: 1, 30: 1, 34: 1, 35: 3, 36: 2, 0: 2}
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


AssertionError: 

In [133]:

len(deltas1)

2399