In [1]:
%load_ext autoreload
import pandas as pd
import numpy as np
from matplotlib.colors import LogNorm
from matplotlib.cm import ScalarMappable
from scipy.ndimage import gaussian_filter
from functools import partial
import operator
from IPython.display import clear_output
from tqdm import tqdm

In [2]:
%autoreload 2
from utils import summary, visualise, string_to_color, visualise_at_interval, convertImage, initialize_data_loader



# Canvas reproduction

In [4]:
visualise_at_interval(
    lambda x, y, user, is_mod : y, 
    lambda l : list(tqdm((convertImage(x,string_to_color) for x in l), total=len(l))), 
    pd.Timedelta(minutes = 15), 
    [["#FFFFFF" for i in range(2000)] for i in range(1000)],
    "image",
) 



ZeroDivisionError: division by zero

# Activity

In [19]:
def get_transform(images, blur = 0):
    blurImages =[gaussian_filter(np.array(img), blur) for img in images]
    maximum = np.max(blurImages)
    custom_cmap = ScalarMappable(cmap='RdBu_r', norm=LogNorm(vmin=1, vmax=maximum + 1))
    return [custom_cmap.to_rgba(img, bytes= True) for img in blurImages]

In [24]:

visualise_at_interval(
    lambda x, y, user, is_mod : x + 1, 
    get_transform,
    pd.Timedelta(minutes = 15), 
    np.ones((1000,2000)),
    "activite",
    duration = pd.Timedelta(minutes = 30),
) 

loaded file : 2023_place_canvas_history-000000000019.csv.gzip

start making visualizations
results saved on activite.gif


In [26]:

visualise_at_interval(
    lambda x, y, user, is_mod  : x + 1, 
    get_transform, 
    -1, 
    np.ones((1000,2000)),
    "activite_total",
) 

loaded file : 2023_place_canvas_history-000000000019.csv.gzip

start making visualizations
results saved on activite_total.gif


In [None]:
blurs =[1,2,5,10,50,100]
visualise_at_interval(
    lambda x, y, user, is_mod  : x + 1, 
    { str(blur) : partial(get_transform, blur = blur) for blur in blurs}, 
    pd.Timedelta(minutes = 15), 
    np.ones((1000,1000)),
    lambda blur : f"activite_blur_{blur}",
    duration = pd.Timedelta(minutes = 30),
) 


visualise_at_interval(
    lambda x, y, user, is_mod  : x + int(not is_mod), 
    { str(blur) : partial(get_transform, blur = blur) for blur in blurs}, 
    pd.Timedelta(minutes = 15), 
    np.ones((1000,1000)),
    lambda blur : f"activite_blur_no_moderator_{blur}",
    duration = pd.Timedelta(minutes = 30),
) 


# Variété d'utilisateur

In [35]:
def compose(f, g):
    return lambda *args, **kwargs : f(g(*args, **kwargs))

def add_user(user, users):
    users.add(user)
    return users

def from_users_sets_by_points_to_len_by_points(images):
    return [[[len(user_set)+1 for user_set in row] for row in image] for image in images]

In [37]:
blurs =[1,5,10]
visualise_at_interval(
    lambda users, _, user, is_mod  : users if is_mod else add_user(user, users), 
    { str(blur) : compose(partial(get_transform, blur = blur),
                          from_users_sets_by_points_to_len_by_points) for blur in blurs}, 
    pd.Timedelta(minutes = 5), 
    [[set() for i in range(1000)] for j in range(1000)],
    lambda blur : f"users_variety_blur{blur}",
    duration = pd.Timedelta(minutes = 15),
    EndingTimeStamp= pd.Timestamp("2023-07-20 14:00:26.088 UTC")
)

just made summary from 2023-07-20 13:55:26.088000+00:00 to 2023-07-20 14:10:26.088000+00:00

start loading file: 2023_place_canvas_history-000000000000.csv
start formating data
loaded file : 2023_place_canvas_history-000000000000.csv
results saved on users_variety_blur1.gif
results saved on users_variety_blur5.gif
results saved on users_variety_blur10.gif


# Détection de communauté

In [3]:
ennemies = {}
moderators = set()

def get_ennemy_and_return_new_user(last_user, color, user, is_mod):

    if is_mod :
        moderators.add(user)
    if last_user :
        if user not in ennemies:
            ennemies[user] = set()
        ennemies[user].add(last_user)
    return user
    
visualise_at_interval(
    get_ennemy_and_return_new_user, 
    {}, 
    -1, 
    [["" for i in range(2000)] for j in range(1000)],
    "",
) 

loaded file : 2023_place_canvas_history-000000000019.csv.gzip

start making visualizations


In [None]:
def graph_from_neighbors_dict(neighbors_dict):
    # Create a NetworkX graph
    G = nx.Graph()


    # Add edges to the graph
    G.add_edges_from((node,neighbor) for node, neighbors in neighbors_dict.items() for neighbor in neighbors)

# Todo


matActivité forme : [x][y] = freq de maj --> image de l'activité bleu à rouge

### map:
- map de zone d'activité
- map de zone d'activité par couleur?
- map de certains utilisateurs actif 
- map de la couleur la plus utilisé
- map de la variété d'utilisateurs
- 

### statistique descriptive:
- activité par utilisateur
- activité par couleur
- activité par heure

### identifications:
- identifié les bots
- identifié communauté / projet de communauté
- identification de la modération

### technique:
- download
- augmentation taille grille
- detection des antagonistes pour trouver les communautés
- prétraitemtent des temps et diverse valeur pour réduire le temps d'ouverture