In [1]:
import gzip
import csv
import json
import datetime

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [5]:
hex_to_id = {'#FFFFFF': np.int8(0)}
id_to_hex = ['#FFFFFF']

place_grid = np.zeros((2000, 2000), dtype=np.int8)

mod_rows = []

def add_color(c):
    global hex_to_id, id_to_hex
    if c not in hex_to_id:
        hex_to_id[c] = np.int8(len(id_to_hex))
        id_to_hex += [c]

In [6]:
# First pass
# Get all colors, number of rows, etc.

with gzip.open('data/2022_place_canvas_history.csv.gzip', mode='rt', newline='') as f:
    f.readline() # skip header
    reader = csv.reader(f)
    start_date = datetime.datetime(2030, 1, 1).timestamp()
    max_i = 0
    num_mod = 0
    for i, row in enumerate(reader):
        try:
            timestamp = datetime.datetime.strptime(row[0], r'%Y-%m-%d %H:%M:%S.%f %Z').timestamp()
        except ValueError:
            timestamp = datetime.datetime.strptime(row[0], r'%Y-%m-%d %H:%M:%S %Z').timestamp()
        if timestamp < start_date:
            start_date = timestamp

        add_color(row[2])

        location = tuple([int(x) for x in row[3].split(',')][::-1])
        if len(location) > 2:
            mod_rows += [row]

        max_i = i

        if i % 1e6 == 0:
            print(i)
    print(max_i)

0
1000000
2000000
3000000
4000000
5000000
6000000
7000000
8000000
9000000
10000000
11000000
12000000
13000000
14000000
15000000
16000000
17000000
18000000
19000000
20000000
21000000
22000000
23000000
24000000
25000000
26000000
27000000
28000000
29000000
30000000
31000000
32000000
33000000
34000000
35000000
36000000
37000000
38000000
39000000
40000000
41000000
42000000
43000000
44000000
45000000
46000000
47000000
48000000
49000000
50000000
51000000
52000000
53000000
54000000
55000000
56000000
57000000
58000000
59000000
60000000
61000000
62000000
63000000
64000000
65000000
66000000
67000000
68000000
69000000
70000000
71000000
72000000
73000000
74000000
75000000
76000000
77000000
78000000
79000000
80000000
81000000
82000000
83000000
84000000
85000000
86000000
87000000
88000000
89000000
90000000
91000000
92000000
93000000
94000000
95000000
96000000
97000000
98000000
99000000
100000000
101000000
102000000
103000000
104000000
105000000
106000000
107000000
108000000
109000000
110000000
111000

In [18]:
print('start timestamp:', start_date)
print('total rows:', max_i + 1)
print('mod actions:', len(mod_rows))
print('colors:', list(reversed(sorted(id_to_hex))))

with open('stats.json', 'w') as f:
    json.dump({
        'start_time': start_date,
        'rows': max_i + 1,
        'censors': mod_rows,
        'colors': list(reversed(sorted(id_to_hex))),
    }, f)

start timestamp: 1648831450.315
total rows: 160353104
mod actions: 19
colors: ['#FFFFFF', '#FFF8B8', '#FFD635', '#FFB470', '#FFA800', '#FF99AA', '#FF4500', '#FF3881', '#E4ABFF', '#DE107F', '#D4D7D9', '#BE0039', '#B44AC0', '#9C6926', '#94B3FF', '#898D90', '#811E9F', '#7EED56', '#6D482F', '#6D001A', '#6A5CFF', '#51E9F4', '#515252', '#493AC1', '#3690EA', '#2450A4', '#00CCC0', '#00CC78', '#00A368', '#009EAA', '#00756F', '#000000']


In [3]:
with open('stats.json', 'r') as f:
    stats = json.load(f)

n_rows = stats['rows']
colors = stats['colors']
start_date = stats['start_time']
color_ids = {c: np.uint32(i) for i, c in enumerate(colors)}

all_rows = np.zeros((n_rows, 6), dtype=np.uint32)

In [4]:
# Second pass
# store all the rows in a uint32 np array

with gzip.open('data/2022_place_canvas_history.csv.gzip', mode='rt', newline='') as f:
    f.readline()
    reader = csv.reader(f)
    for i, row in enumerate(reader):
        try:
            timestamp = datetime.datetime.strptime(row[0], r'%Y-%m-%d %H:%M:%S.%f %Z').timestamp()
        except ValueError:
            timestamp = datetime.datetime.strptime(row[0], r'%Y-%m-%d %H:%M:%S %Z').timestamp()
        
        all_rows[i, 0] = np.uint32((timestamp - start_date) * 1000)
        all_rows[i, 1] = color_ids[row[2]]

        location = [int(x) for x in row[3].split(',')]
        all_rows[i, 2] = location[0]
        all_rows[i, 3] = location[1]

        if len(location) > 2:
            all_rows[i, 4] = location[2]
            all_rows[i, 5] = location[3]

        if i % 1e6 == 0:
            print(int(i / 1e6))


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160


In [5]:
np.save('data/unsorted_pixels.npy', all_rows)

In [4]:
all_rows = np.load('data/unsorted_pixels.npy')

In [5]:
all_rows.view('int32,int32,int32,int32,int32,int32').sort(order=['f0'], axis=0)

In [6]:
np.save('data/sorted_pixels.npy', all_rows)