In [2]:
%load_ext autoreload
import pandas as pd
import numpy as np
from matplotlib.colors import LogNorm
from matplotlib.cm import ScalarMappable
from scipy.ndimage import gaussian_filter
from functools import partial
import operator
from IPython.display import clear_output
from tqdm import tqdm
import networkx as nx
import timeit
import random

In [3]:
%autoreload 2
from utils import summary, visualise, string_to_color, visualise_at_interval, convertImage, initialize_data_loader



# Canvas reproduction

In [4]:
visualise_at_interval(
    lambda rez, row : row.pixel_color, 
    lambda l : list(tqdm((convertImage(x,string_to_color) for x in l), total=len(l))), 
    pd.Timedelta(minutes = 15), 
    [["#FFFFFF" for i in range(2000)] for i in range(1000)],
    "image",
) 



ZeroDivisionError: division by zero

# Activity

In [19]:
def get_transform(images, blur = 0):
    blurImages =[gaussian_filter(np.array(img), blur) for img in images]
    maximum = np.max(blurImages)
    custom_cmap = ScalarMappable(cmap='RdBu_r', norm=LogNorm(vmin=1, vmax=maximum + 1))
    return [custom_cmap.to_rgba(img, bytes= True) for img in blurImages]

In [24]:

visualise_at_interval(
    lambda rez, row: rez + 1, 
    get_transform,
    pd.Timedelta(minutes = 15), 
    np.ones((1000,2000)),
    "activite",
    duration = pd.Timedelta(minutes = 30),
) 

loaded file : 2023_place_canvas_history-000000000019.csv.gzip

start making visualizations
results saved on activite.gif


In [26]:

visualise_at_interval(
    lambda rez, row : rez + 1, 
    get_transform, 
    -1, 
    np.ones((1000,2000)),
    "activite_total",
) 

loaded file : 2023_place_canvas_history-000000000019.csv.gzip

start making visualizations
results saved on activite_total.gif


In [None]:
blurs =[1,2,5,10,50,100]
visualise_at_interval(
    lambda rez, row  : rez + 1, 
    { str(blur) : partial(get_transform, blur = blur) for blur in blurs}, 
    pd.Timedelta(minutes = 15), 
    np.ones((1000,1000)),
    lambda blur : f"activite_blur_{blur}",
    duration = pd.Timedelta(minutes = 30),
) 


visualise_at_interval(
    lambda rez, row : rez + int(not row.is_mod), 
    { str(blur) : partial(get_transform, blur = blur) for blur in blurs}, 
    pd.Timedelta(minutes = 15), 
    np.ones((1000,1000)),
    lambda blur : f"activite_blur_no_moderator_{blur}",
    duration = pd.Timedelta(minutes = 30),
) 


# Variété d'utilisateur

In [35]:
def compose(f, g):
    return lambda *args, **kwargs : f(g(*args, **kwargs))

def add_user(user, users):
    users.add(user)
    return users

def from_users_sets_by_points_to_len_by_points(images):
    return [[[len(user_set)+1 for user_set in row] for row in image] for image in images]

In [37]:
blurs =[1,5,10]
visualise_at_interval(
    lambda users, _, user, is_mod  : users if is_mod else add_user(user, users), 
    { str(blur) : compose(partial(get_transform, blur = blur),
                          from_users_sets_by_points_to_len_by_points) for blur in blurs}, 
    pd.Timedelta(minutes = 5), 
    [[set() for i in range(1000)] for j in range(1000)],
    lambda blur : f"users_variety_blur{blur}",
    duration = pd.Timedelta(minutes = 15),
)

just made summary from 2023-07-20 13:55:26.088000+00:00 to 2023-07-20 14:10:26.088000+00:00

start loading file: 2023_place_canvas_history-000000000000.csv
start formating data
loaded file : 2023_place_canvas_history-000000000000.csv
results saved on users_variety_blur1.gif
results saved on users_variety_blur5.gif
results saved on users_variety_blur10.gif


# Détection de communauté

In [14]:
ennemies = {}
moderators = set()

def get_ennemy_and_return_new_user(last_user, row, is_mod):

    if is_mod :
        moderators.add(row.user)
    if last_user :
        if not row.user in ennemies:
            ennemies[row.user] = {}
        ennemies[row.user][last_user] = ennemies[row.user].get(last_user, 0) + 1
    return row.user


visualise_at_interval(
    get_ennemy_and_return_new_user, 
    {}, 
    -1, 
    [["" for i in range(2000)] for j in range(1000)],
    "",
) 

loaded file : 2023_place_canvas_history-000000000019.csv.gzip

start making visualizations


In [17]:
class all_in_one:
    def __init__(self, val) -> None:
        self.val = val
    
    def __getitem__(self, index):
        return self.val
    
    def __setitem__(self, index, val):
        self.val = val


def get_color (colors, row, is_mod):
    if not is_mod:
        colors[row.pixel_color] = colors.get(row.pixel_color, 0) + 1
    return colors



visualise_at_interval(
    get_color, 
    {},
    -1,
    all_in_one(all_in_one({})),
    "",
)

  0%|          | 1/2359617 [00:00<05:09, 7626.01it/s]

{}
None
202 1000
0,-298





AttributeError: 'NoneType' object has no attribute 'get'

In [4]:
total_link = sum(len(neighbors) for neighbors in ennemies.values())
total_link

47163075

In [26]:
from utils import convert_to_milliseconds

print(convert_to_milliseconds(pd.Timedelta(milliseconds=3000)))

3000


In [None]:
def make_symmetric(ennemies):
    for node in ennemies:
        for enemy in ennemies[node]:
            ennemies[enemy][node] = ennemies[enemy].get(node,0) + ennemies[enemy][node]
            

def ennemies_of_enemies_are_friends(enemies):
    make_symmetric(enemies)
    enemies_of_enemies = {node : {enemy : -value for enemy, value in enemies[node].items()} for node in enemies}

    for node in enemies:
        for enemy in enemies[node]:
            for enemy_of_enemy in enemies[enemy]:
                enemies_of_enemies[node] = enemies_of_enemies.get(node,0) + 

In [9]:
print(range(5))

range(0, 5)


In [None]:
# Create a NetworkX graph
G_ennemies = nx.Graph(tqdm(((node,neighbor) for node, neighbors in ennemies.items() for neighbor in neighbors), total= total_link))

In [6]:
import community

In [8]:
G_basic = nx.Graph()
G_basic.add_edges_from([(1,2),(2,3),(1,3),(1,4),(3,4),(1,5),(2,5),(4,5)])

In [9]:
community.generate_dendrogram(G_basic, inverted= True)

[{1: 0, 2: 0, 3: 0, 4: 1, 5: 1}]

# Todo

In [4]:
stats = {'pixels': 0, 'mods': 0, 'non_mods': 0}
users = set()
def aggregate(stats, users, user, is_mod):
    users.add(user)
    stats['pixels'] += 1
    if is_mod:
        stats['mods'] += 1
    else:
        stats['non_mods'] += 1

visualise_at_interval(
    lambda x, y, user, is_mod : aggregate(stats, users, user, is_mod),
    {}, 
    pd.Timedelta(minutes= 30), 
    [["#FFFFFF" for i in range(2000)] for i in range(1000)],
    "image",
) 
stats, users

loaded file : 2023_place_canvas_history-000000000019.csv.gzip

start making visualizations


{'pixels': 49971807, 'mods': 296716, 'non_mods': 49675091}


matActivité forme : [x][y] = freq de maj --> image de l'activité bleu à rouge

### map:
- map de zone d'activité
- map de zone d'activité par couleur?
- map de certains utilisateurs actif 
- map de la couleur la plus utilisé
- map de la variété d'utilisateurs
- 

### statistique descriptive:
- activité par utilisateur
- activité par couleur
- activité par heure

### identifications:
- identifié les bots
- identifié communauté / projet de communauté
- identification de la modération

### technique:
- download fait
- augmentation taille grille fait
- detection des antagonistes pour trouver les communautés fait
- prétraitemtent des temps et diverse valeur pour réduire le temps d'ouverture fait