In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df1 = pd.read_csv("../data1.csv")
name_to_idx = {'outside': 0} | {'r' + str(i): i for i in range(1, 35)} | {'c1': 35, 'c2': 36}
areas = ['r' + str(i) for i in range(1, 35)] + ['c1', 'c2', 'outside']
truth1 = df1[areas].rename(name_to_idx, axis=1)

In [3]:
# display(truth1.head())
# display(truth1.shift().head())

### deltas1 stores the change in people in each room per time period.
deltas1 = (truth1 - truth1.shift()).dropna().reset_index(drop=True)
display(deltas1.head(1))

# Index 200 is interesting - indicates a potential move from c2 to r27, or r13 to r27. 
moves = {K:V for K, V in dict(deltas1.iloc[200]).items() if V != 0}
print(moves)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,28,29,30,31,32,33,34,35,36,0
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


{12: np.float64(1.0), 13: np.float64(-1.0), 27: np.float64(1.0), 35: np.float64(1.0), 36: np.float64(-2.0)}


In [4]:
candidateNeighbours = {area: set() for area in range(37)}
nTransitions = np.zeros((37, 37))
# We're going to populate the candidateNeighbours dictionary, and manually remove 'bad' looking transitions afterwards

# Transitions made in day 1
for idx, move in deltas1.iloc[:,].iterrows():
    moveFrom = [K for K, V in dict(move).items() if V < 0]
    moveTo = [K for K, V in dict(move).items() if V > 0]
    for start in moveFrom:
        for end in moveTo:
            candidateNeighbours[start].add(end)
            nTransitions[start, end] += 1


In [5]:
for area, neighbours in candidateNeighbours.items():
    print(f"'{area}': {sorted(list(neighbours))}")
    ...

'0': [2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36]
'1': [0, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 20, 22, 24, 25, 26, 27, 28, 29, 30, 32, 35, 36]
'2': [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36]
'3': [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36]
'4': [1, 2, 6, 7, 9, 10, 12, 14, 16, 17, 20, 23, 25, 26, 27, 28, 29, 30, 32, 35, 36]
'5': [1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 25, 26, 27, 28, 29, 30, 35, 36]
'6': [0, 1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 16, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36]
'7': [1, 2, 3, 6, 8, 11, 12, 13, 14, 16, 17, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 35, 36]
'8': [1, 2, 3, 6, 12, 14, 15, 16, 17, 19, 20, 22, 23, 26, 27, 28, 29, 35, 36]
'9': [1, 3, 6, 10, 12, 13, 14, 16, 20

In [6]:
nTransitions

array([[  0.,   0.,   3., ...,   2.,  14.,   7.],
       [  3.,   0.,  74., ...,   0.,  13.,  17.],
       [  6.,  71.,   0., ...,   2.,  36.,  27.],
       ...,
       [  0.,   3.,   4., ...,   0.,   5.,   6.],
       [ 13.,  14.,  33., ...,  12.,   0., 169.],
       [ 12.,  27.,  35., ...,   4., 155.,   0.]])

In [7]:
# Pruning time
removed = 0
for area, neighbours in candidateNeighbours.items():
    toRemove = set()
    for n in neighbours:
        if nTransitions[area, n] < sum(nTransitions[area,]) * 0.01:
            removed += 1
            toRemove.add(n)
    for n in toRemove:
        candidateNeighbours[area].remove(n)
print(f"Removed {removed} insignificant connections")

removed = 0
for area, neighbours in candidateNeighbours.items():
    toRemove = set()
    for n in neighbours:
        if area not in candidateNeighbours[n]:
            # print(f"Removed connection {area}->{n} as there was no connection {n}->{area}")
            removed += 1
            toRemove.add(n)
    for n in toRemove:
        candidateNeighbours[area].remove(n)
print(f"Removed {removed} un-reciprocal connections")

Removed 271 insignificant connections
Removed 282 un-reciprocal connections


### Attempt 2
We still use the deltas1 from above, which indicates the change in room occupancy from step to step.
The idea is to try and decompose this into a guess of what transitions truly occurred.
In a case for e.g. where there the only thing that happens is area 9 increases by 1, and area 36 decreases by 1,
we're going to assume that there was one transition -> 36->9.

Oh. Maybe not.
Consider from above:

```
# Index 200 is interesting - indicates a potential move from c2 to r27, or r13 to r27. 
moves = {K:V for K, V in dict(deltas1.iloc[200]).items() if V != 0}
print(moves)
```

{12: 1.0, 13: -1.0, 27: 1.0, 35: 1.0, 36: -2.0}

Could just be 2 people entering c1, 1 person going from c1 to r27, which is probably more likely...

In [8]:
for idx, step in deltas1.iterrows():
    enters = []
    exits = []
    for K, V in dict(step).items():
        if V == 0:
            continue
        if V > 0:
            enters += [K] * int(V)
        exits += [K] * (-int(V))

    if idx % 10 == 0:
        print(f"Iteration {idx}:")
        print(f"Entrances: {enters}")
        print(f"Exits: {exits}")

    if idx >= 100:
        break


Iteration 0:
Entrances: [14]
Exits: [22]
Iteration 10:
Entrances: [35, 36]
Exits: [14, 14]
Iteration 20:
Entrances: [36]
Exits: [9]
Iteration 30:
Entrances: []
Exits: []
Iteration 40:
Entrances: []
Exits: []
Iteration 50:
Entrances: [14, 32]
Exits: [22, 31]
Iteration 60:
Entrances: [2, 12, 12, 22, 30]
Exits: [3, 35, 36, 36, 0]
Iteration 70:
Entrances: [6]
Exits: [4]
Iteration 80:
Entrances: [2, 7, 7, 8, 13, 15, 16, 18, 26, 26, 26, 31, 36]
Exits: [12, 32, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35]
Iteration 90:
Entrances: [26]
Exits: [25]
Iteration 100:
Entrances: [3, 26, 36]
Exits: [2, 20, 35]


In [9]:
deltas1

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,28,29,30,31,32,33,34,35,36,0
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,-2.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2394,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2395,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2397,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
m = np.zeros((37, 37, len(deltas1)))

neighboursDict = {
    0: [14, 22, 24, 35],
    1: [2, 3, 12],
    2: [1, 3, 12, 36],
    3: [1, 2, 7, 12, 14, 16, 35, 36],
    4: [5, 6, 14],
    5: [4, 6],
    6: [4, 5, 14, 22, 27, 35],
    7: [3, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 28, 35, 36],
    8: [7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 36],
    9: [7, 8, 10, 11, 12, 13, 15, 16, 17, 18, 32, 35, 36],
    10: [7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 27, 35, 36],
    11: [3, 7, 8, 9, 10, 12, 13, 15, 16, 17, 18, 26, 35, 36],
    12: [1, 2, 3, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 35, 36],
    13: [3, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 34, 35, 36],
    14: [0, 3, 4, 6, 12, 16, 22, 24, 27, 28, 32, 35, 36, 13],
    15: [7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 36],
    16: [3, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 27, 35, 36],
    17: [3, 7, 8, 9, 10, 11, 12, 13, 15, 16, 18, 35, 36],
    18: [3, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 35, 36],
    19: [20, 23, 26, 27],
    20: [19, 23, 25, 26, 27, 29, 30, 34, 35],
    21: [26, 27, 32, 35, 36],
    22: [0, 6, 14, 24, 28, 35, 36],
    23: [19, 20, 26, 27, 30],
    24: [14, 22, 27, 28, 34, 35, 0],
    25: [20, 26, 27, 29, 30, 32],
    26: [11, 19, 20, 21, 23, 25, 27, 28, 29, 30, 32, 35, 36],
    27: [6, 10, 14, 16, 20, 21, 23, 24, 25, 26, 28, 30, 31, 32, 35, 36],
    28: [7, 14, 22, 24, 26, 27, 33, 34, 35, 36],
    29: [20, 25, 26, 30],
    30: [20, 23, 25, 26, 27, 29, 32],
    31: [27, 32],
    32: [9, 14, 21, 25, 26, 27, 30, 31, 35],
    33: [28, 34],
    34: [13, 20, 24, 28, 33, 35, 36],
    35: [0, 3, 6, 7, 9, 11, 12, 13, 14, 16, 17, 18, 20, 21, 22, 24, 26, 27, 28, 32, 34, 36],
    36: [2, 3, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 22, 26, 27, 28, 34, 35],
}

for idx, delta in deltas1.iterrows():
    truth = truth1.iloc[idx]

    # moves = {K:int(V) for K, V in dict(deltas1.iloc[idx]).items() if V != 0}
    # start = {K:V for K, V in dict(truth1.iloc[idx]).items() if V != 0}
    # end = {K:V for K, V in dict(truth1.iloc[idx+1]).items() if V != 0}
    # print(moves)
    # print(f"Start: {start}")
    # print(f"End: {end}")

    # if not all(deltas1.iloc[idx] == delta):
    #     print(idx)
    #     print(dict(deltas1.iloc[idx]))
    #     print(dict(deltas1.iloc[idx+1]))
    #     print(dict(delta))
    #     assert(False)

    for start in range(37):
        if truth[start] == 0:
            continue

        n_left_start = -min(delta[start], 0)
        # Count total gain of neighbours (only neighbours who gain)
        total_gain_n = 0
        for n in neighboursDict[start]:
            total_gain_n += max(delta[n], 0)

        for end in range(37):
            if start == end:
                m[start, end, idx] = (truth[start] - n_left_start) / truth[start]
            elif end in neighboursDict[start] and total_gain_n != 0:
                m[start, end, idx] = (max(delta[end], 0) / total_gain_n) * (n_left_start / truth[start])
            
        if abs(np.sum(m[start, :, idx]) - 1) < 0.001:
            continue

        print(f"idx = {idx}, Start = {start}, truth[start] = {truth[start]}, total-gain-n = {total_gain_n}, n_left_start = {n_left_start}")
        print({K:int(V) for K, V in dict(delta).items() if V != 0})
        print({K:int(V) for K, V in dict(truth).items() if V != 0})
        print(m[start, :, idx])
        assert(False)

In [11]:
for idx, row in enumerate(m[:,:,0]):
    print(idx, row)

0 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. -0.  0.  0.  0.
  0.  0.  0.  0. -0.  0. -0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. -0.
  0.]
1 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
2 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
3 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
4 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
5 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
6 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
7 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
8 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0

In [22]:
avgTransition = np.zeros((10, 37, 37))
for i in range(10):
    if i != 9:
        mi = m[:,:,i*240:(i+1)*240]
    else:
        mi = m[:,:,i*240:]
        
    nCounted = [0]*37
    for timestep in range(mi.shape[2]):
        transition = mi[:,:,timestep]
        for idx, row in enumerate(transition):
            if np.any(row):
                nCounted[idx] += 1

    nCounted = np.repeat(np.array(nCounted).reshape((37, 1)), 37, axis=1)

    totalTransition = np.sum(mi, axis=2)

    avgTransition[i] = totalTransition / nCounted

  avgTransition[i] = totalTransition / nCounted


In [19]:
np.save("../avgTransitionRaw.npy", avgTransition)
    

In [41]:
print(avgTransition[0,22,:])
print(avgTransition[9,22,:])

[0.2125     0.         0.         0.         0.         0.
 0.025      0.         0.         0.         0.         0.
 0.         0.         0.22570883 0.         0.         0.
 0.         0.         0.         0.         0.42163462 0.
 0.02573964 0.         0.         0.         0.00227273 0.
 0.         0.         0.         0.         0.         0.07151919
 0.015625  ]
[0.43586601 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.09722222 0.         0.         0.
 0.         0.         0.         0.         0.44444444 0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.02246732]


In [21]:
redistributeWealth = {
    0: [14, 22, 24, 35],
    1: [2, 3, 12],
    2: [1, 3, 12, 36],
    3: [1, 2, 12, 36],
    4: [5, 6, 14],
    5: [4, 6],
    6: [4, 5, 14, 22, 35],
    7: [8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 35, 36],
    8: [7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 36],
    9: [7, 8, 10, 11, 12, 13, 15, 16, 17, 18, 35, 36],
    10: [7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 36],
    11: [7, 8, 9, 10, 12, 13, 15, 16, 17, 18, 35, 36],
    12: [1, 2, 3, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 35, 36],
    13: [7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 35, 36],
    14: [0, 4, 6, 12, 13, 22, 24, 35, 36],
    15: [7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 36],
    16: [7, 8, 9, 10, 11, 12, 13, 15, 17, 18, 35, 36],
    17: [7, 8, 9, 10, 11, 12, 13, 15, 16, 18, 35, 36],
    18: [7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 35, 36],
    19: [20, 23],
    20: [19, 23, 25, 26, 27, 29, 30],
    21: [26, 27, 32, 35],
    22: [0, 6, 14, 24, 28, 35, 36],
    23: [19, 20, 26],
    24: [0, 14, 22, 28, 34, 35],
    25: [20, 26, 27, 29, 30],
    26: [20, 21, 23, 25, 27, 28, 29, 30, 32, 35],
    27: [20, 21, 25, 26, 28, 30, 31, 32, 35],
    28: [22, 24, 26, 27, 33, 34, 35],
    29: [20, 25, 26, 30],
    30: [20, 25, 26, 27, 29],
    31: [27, 32],
    32: [21, 26, 27, 31, 35],
    33: [28, 34],
    34: [24, 28, 33, 35],
    35: [0, 6, 7, 9, 11, 12, 13, 14, 16, 17, 18, 21, 22, 24, 26, 27, 28, 32, 34, 36],
    36: [2, 3, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 22, 27, 34, 35],
}

In [13]:
for start_idx, start in enumerate(avgTransition):
    startConnections = []
    for end_idx, end in enumerate(start):
        if end != 0:
            startConnections.append(end_idx)
    # print(f"{start_idx}: {startConnections}")

In [92]:
for start_idx, start in enumerate(redistributed):
    startConnections = []
    for end_idx, end in enumerate(start):
        if end != 0:
            startConnections.append(end_idx)
    # print(f"{start_idx}: {startConnections}")

In [48]:
# Redistributed culls 'bad' transitions, distributing its probability amongst all the other
# transitions (without increasing the 'stay in the same room' transition prob)
redistributed = np.zeros((10, 37, 37))
for i in range(10):
    for start, row in enumerate(avgTransition[i]):
        redistributed[i, start] = row
        for end in range(37):
            if end in redistributeWealth[start] or start == end:
                continue
            redistributed[i, start, end] = 0
        # Ensure [start, start] transition remains the same, but other transitions are
        # scaled up so that the distribution sums to 1
        normConst = (1 - avgTransition[i, start, start]) / (np.sum(redistributed[i, start]) - redistributed[i, start, start])
        redistributed[i, start] *= normConst
        redistributed[i, start, start] = avgTransition[i, start, start]
redistributed[np.isnan(redistributed)] = 0

np.save("../avgTransitionRedistributed.npy", redistributed)

nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan


  normConst = (1 - avgTransition[i, start, start]) / (np.sum(redistributed[i, start]) - redistributed[i, start, start])


In [40]:
print(redistributed[0,0,:])
print(redistributed[9,0,:])

[9.74490695e-01 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 4.64143300e-03 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 1.78121504e-02 0.00000000e+00
 4.49842555e-04 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 2.60587913e-03
 0.00000000e+00]
[ 1. nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan]
