In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df1 = pd.read_csv("../data1.csv")
name_to_idx = {'outside': 0} | {'r' + str(i): i for i in range(1, 35)} | {'c1': 35, 'c2': 36}
areas = ['r' + str(i) for i in range(1, 35)] + ['c1', 'c2', 'outside']
truth1 = df1[areas]

In [19]:
# display(truth1.head())
# display(truth1.shift().head())

### deltas1 stores the change in people in each room per time period.
deltas1 = (truth1 - truth1.shift()).dropna().rename(name_to_idx, axis=1)
display(deltas1.head(1))

# Index 200 is interesting - indicates a potential move from c2 to r27, or r13 to r27. 
moves = {K:V for K, V in dict(deltas1.iloc[200]).items() if V != 0}
print(moves)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,28,29,30,31,32,33,34,35,36,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


{12: 1.0, 13: -1.0, 27: 1.0, 35: 1.0, 36: -2.0}


In [74]:
candidateNeighbours = {area: set() for area in range(37)}
nTransitions = np.zeros((37, 37))
# We're going to populate the candidateNeighbours dictionary, and manually remove 'bad' looking transitions afterwards

# Transitions made in day 1
for idx, move in deltas1.iloc[:,].iterrows():
    moveFrom = [K for K, V in dict(move).items() if V < 0]
    moveTo = [K for K, V in dict(move).items() if V > 0]
    for start in moveFrom:
        for end in moveTo:
            candidateNeighbours[start].add(end)
            nTransitions[start, end] += 1


In [76]:
for area, neighbours in candidateNeighbours.items():
    print(f"'{area}': {sorted(list(neighbours))}")
    ...

'0': [2, 14, 16, 22, 28, 35, 36]
'1': [2, 3, 12, 13, 20, 22, 25, 26, 27, 28, 29, 30, 35, 36]
'2': [0, 1, 3, 6, 7, 10, 12, 14, 16, 20, 23, 24, 25, 26, 27, 28, 29, 30, 32, 35, 36]
'3': [1, 2, 6, 12, 14, 16, 20, 22, 23, 25, 26, 27, 28, 29, 30, 32, 35, 36]
'4': [6]
'5': [6, 22, 28, 29]
'6': [2, 3, 4, 5, 7, 12, 14, 16, 20, 22, 24, 25, 26, 27, 28, 30, 32, 35, 36]
'7': [2, 6, 11, 12, 14, 16, 18, 20, 25, 27, 28, 29, 30, 31, 32, 35, 36]
'8': [15, 36]
'9': [10, 13, 16, 36]
'10': [2, 9, 11, 12, 14, 16, 22, 25, 26, 30, 32, 35, 36]
'11': [7, 10, 19]
'12': [1, 2, 3, 6, 7, 10, 14, 15, 16, 20, 22, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36]
'13': [1, 9, 32, 34, 36]
'14': [0, 2, 3, 6, 7, 10, 12, 16, 20, 22, 24, 25, 26, 27, 28, 29, 31, 32, 35, 36]
'15': [8, 12, 22, 36]
'16': [0, 2, 3, 6, 7, 9, 10, 12, 14, 20, 22, 25, 26, 27, 28, 29, 30, 32, 35, 36]
'17': [32]
'18': [7, 30, 32, 36]
'19': [11, 20, 22, 29]
'20': [1, 2, 3, 6, 7, 12, 14, 16, 19, 21, 22, 23, 24, 25, 26, 27, 28, 30, 32, 34, 35, 36]
'21': [20, 

In [59]:
nTransitions

array([[  0.,   0.,   3., ...,   2.,  14.,   7.],
       [  3.,   0.,  74., ...,   0.,  13.,  17.],
       [  6.,  71.,   0., ...,   2.,  36.,  27.],
       ...,
       [  0.,   3.,   4., ...,   0.,   5.,   6.],
       [ 13.,  14.,  33., ...,  12.,   0., 169.],
       [ 12.,  27.,  35., ...,   4., 155.,   0.]])

In [75]:
# Pruning time
removed = 0
for area, neighbours in candidateNeighbours.items():
    toRemove = set()
    for n in neighbours:
        if nTransitions[area, n] < sum(nTransitions[area,]) * 0.01:
            removed += 1
            toRemove.add(n)
    for n in toRemove:
        candidateNeighbours[area].remove(n)
print(f"Removed {removed} insignificant connections")

removed = 0
for area, neighbours in candidateNeighbours.items():
    toRemove = set()
    for n in neighbours:
        if area not in candidateNeighbours[n]:
            # print(f"Removed connection {area}->{n} as there was no connection {n}->{area}")
            removed += 1
            toRemove.add(n)
    for n in toRemove:
        candidateNeighbours[area].remove(n)
print(f"Removed {removed} un-reciprocal connections")

Removed 271 insignificant connections
Removed 282 un-reciprocal connections


### Attempt 2
We still use the deltas1 from above, which indicates the change in room occupancy from step to step.
The idea is to try and decompose this into a guess of what transitions truly occurred.
In a case for e.g. where there the only thing that happens is area 9 increases by 1, and area 36 decreases by 1,
we're going to assume that there was one transition -> 36->9.

Oh. Maybe not.
Consider from above:

```
# Index 200 is interesting - indicates a potential move from c2 to r27, or r13 to r27. 
moves = {K:V for K, V in dict(deltas1.iloc[200]).items() if V != 0}
print(moves)
```

{12: 1.0, 13: -1.0, 27: 1.0, 35: 1.0, 36: -2.0}

Could just be 2 people entering c1, 1 person going from c1 to r27, which is probably more likely...

In [18]:
for idx, step in deltas1.iterrows():
    enters = []
    exits = []
    for K, V in dict(step).items():
        if V == 0:
            continue
        if V > 0:
            enters += [K] * int(V)
        exits += [K] * (-int(V))

    if idx % 10 == 0:    
        print(f"Iteration {idx}:")
        print(f"Entrances: {enters}")
        print(f"Exits: {exits}")

    if idx >= 100:
        break


Iteration 10:
Entrances: [14, 14, 0, 0]
Exits: [22, 22, 22, 22]
Iteration 20:
Entrances: [9]
Exits: [36]
Iteration 30:
Entrances: []
Exits: []
Iteration 40:
Entrances: [26]
Exits: [20]
Iteration 50:
Entrances: [22]
Exits: [0]
Iteration 60:
Entrances: [3, 26, 26, 36, 36]
Exits: [12, 14, 20, 35, 35]
Iteration 70:
Entrances: [3]
Exits: [12]
Iteration 80:
Entrances: [12, 26, 27, 27, 27, 34, 36, 36, 36, 36, 36, 36, 36]
Exits: [14, 14, 14, 14, 14, 14, 14, 25, 28, 35, 35, 35, 35]
Iteration 90:
Entrances: [25]
Exits: [29]
Iteration 100:
Entrances: [2, 9, 20, 25]
Exits: [1, 19, 26, 36]


In [None]:
candidateNeighbours = {
    'r1':[],
    'r2':[],
    'r3':[],
    'r4':[],
    'r5':[],
    'r6':[],
    'r7':[],
    'r8':[],
    'r9':[],
    'r10':[],
    'r11':[],
    'r12':[],
    'r13':[],
    'r14':[],
    'r15':[],
    'r16':[],
    'r17':[],
    'r18':[],
    'r19':[],
    'r20':[],
    'r21':[],
    'r22':[],
    'r23':[],
    'r24':[],
    'r25':[],
    'r26':[],
    'r27':[],
    'r28':[],
    'r29':[],
    'r30':[],
    'r31':[],
    'r32':[],
    'r33':[],
    'r34':[],
    'c1':[],
    'c2':[],
    'outside':[],
}
