# Импорти, глобални параметри, функции

In [None]:
from dask import dataframe as dd
from datetime import datetime
from matplotlib.ticker import FuncFormatter
from matplotlib.font_manager import FontProperties
import matplotlib.pyplot as plt
import pandas_alive
import moviepy.editor as mp

In [None]:
plt.style.use('style.mplstyle')

fig = plt.figure()
ax = plt.axes()

In [None]:
SAVE_FILES = False

In [None]:
def parse_timestamp(timestamp, date_format):
    timestamp /= 1000
    timestamp = int(timestamp)
    timestamp += 1648806250

    dt = datetime.fromtimestamp(timestamp).strftime(date_format)
    dt = datetime.strptime(dt, date_format)

    return dt

In [None]:
def parse_color(pixel_color):

    colors_dict = {
        '#000000': 0,
        '#00756F': 1,
        '#009EAA': 2,
        '#00A368': 3,
        '#00CC78': 4,
        '#00CCC0': 5,
        '#2450A4': 6,
        '#3690EA': 7,
        '#493AC1': 8,
        '#515252': 9,
        '#51E9F4': 10,
        '#6A5CFF': 11,
        '#6D001A': 12,
        '#6D482F': 13,
        '#7EED56': 14,
        '#811E9F': 15,
        '#898D90': 16,
        '#94B3FF': 17,
        '#9C6926': 18,
        '#B44AC0': 19,
        '#BE0039': 20,
        '#D4D7D9': 21,
        '#DE107F': 22,
        '#E4ABFF': 23,
        '#FF3881': 24,
        '#FF4500': 25,
        '#FF99AA': 26,
        '#FFA800': 27,
        '#FFB470': 28,
        '#FFD635': 29,
        '#FFF8B8': 30,
        '#FFFFFF': 31,
    }

    return [k for k, v in colors_dict.items() if v == pixel_color][0]

In [None]:
def millions(x, pos):
    return '%1.1f' % (x * 1e-6)

formatter = FuncFormatter(millions)

# Импортиране на файлове

In [None]:
ddf_users = dd.read_parquet('data\\data_users')

ddf_users.head()

In [None]:
ddf_main = dd.read_parquet('data\\data_core')

ddf_main['pixel_color'] = ddf_main['pixel_color'].map(parse_color)

ddf_main.head()

In [None]:
ddf_pivoted = dd.read_parquet('data\\data_hourly-colors-count')

ddf_pivoted.head()

In [None]:
ddf_datecount = dd.read_parquet('data\\data_hourly-pixels')

ddf_datecount.head()

# Диаграми

### Стълбовидна диаграма, представяща 20-те най-активни потребители

In [None]:
all_users = ddf_users.user_id.value_counts().compute().reset_index(name='num_placed_pixels')
most_active_users = all_users.head(20).set_index('index')

most_active_users.head()

In [None]:
plot = most_active_users.plot(kind='barh', edgecolor='#3193E4')
plot.set_title('20-те най-активни потребители')
plot.set_xlabel('Брой поставени пиксели')
plot.set_ylabel('Уникален идентификатор')

y_tick_labels = []
for label in plot.get_yticklabels():
    y_tick_labels.append(label.get_text()[0:10])

plot.set_yticklabels(y_tick_labels)

bar_labels = plot.containers[0]
plot.axes.bar_label(bar_labels, padding=5, fontsize=16)

plot.invert_yaxis()

plot.get_legend().remove()

In [None]:
if SAVE_FILES:
    now = datetime.now().strftime('%d%H%M')

    fig = plot.get_figure()
    fig.savefig(f'output\\plot_users-{now}.png')

### Разпръсната диаграма, представяща броя потребители според това колко пиксели са поставили

In [None]:
num_active_users = all_users.num_placed_pixels.value_counts().reset_index(name='users')

num_active_users.head()

In [None]:
fig = plt.figure(figsize=(25,10))
ax = fig.add_subplot(111)    
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.tick_params(labelcolor='#E6E6E6', top=False, bottom=False, left=False, right=False)
ax.grid(False)

ax.set_title('Брой потребители, в зависимост от това колко пиксела са поставили')

num_active_users.plot(x=0, y=1, kind='scatter', zorder=3, s=50, ax=ax1)

ax1.set_xlabel('Брой поставени пиксели')
ax1.set_ylabel('Брой потребители (милиони)')
ax1.set_xticks([1,100,200,300,400,500,600,700,800])

num_active_users.plot(x=0, y=1, kind='scatter', zorder=3, s=30, ax=ax2)

ax2.set_xlabel('Брой поставени пиксели')
ax2.set_ylabel('Брой потребители')
ax2.set_yscale('log')
ax2.set_xticks([1,100,200,300,400,500,600,700,800])

In [None]:
if SAVE_FILES:
    now = datetime.now().strftime('%d%H%M')

    fig.savefig(f'output\\plot_activity-{now}.png')

### Статистика за броя потребители, участвали в експеримента

In [None]:
unique_users = all_users['index'].count()

print(f'Брой потребители, участвали в експеримента: {unique_users}')

### Диаграма, представяща броя поставени пиксели от всеки цвят

In [None]:
ddf_colors = ddf_main.copy()
ddf_colors = ddf_colors.drop(['timestamp', 'coordinate'], axis=1)

ddf_colors.head()

In [None]:
most_used_colors = ddf_colors.pixel_color.value_counts().compute()

most_used_colors.head()

In [None]:
color_bars = []
for color_num in most_used_colors.iteritems():
    color_bars.append(color_num[0])

plot = most_used_colors.plot(kind='barh', color=color_bars, edgecolor='black', zorder=3, width=0.8)

plot.set_title('Общ брой пиксели от всеки цвят')
plot.set_xlabel('Брой поставени пиксели (милиони)')
plot.set_ylabel('Цвят (hex)')

plot.set_yticklabels(color_bars)
plot.xaxis.set_major_formatter(formatter)
    
plot.invert_yaxis()

In [None]:
if SAVE_FILES:
    now = datetime.now().strftime('%d%H%M')

    fig = plot.get_figure()
    fig.savefig(f'output\\plot_colors-{now}.png')

In [None]:
least_used_colors = most_used_colors.drop(['#000000', '#FFFFFF', '#FF4500', '#2450A4', '#FFD635'])

least_used_colors.head()

In [None]:
color_bars = []
for color_num in least_used_colors.iteritems():
    color_bars.append(color_num[0])

plot = least_used_colors.plot(kind='barh', color=color_bars, edgecolor='black', zorder=3, width=0.8)

plot.set_title('Общ брой пиксели от всеки цвят')
plot.set_xlabel('Брой поставени пиксели (милиони)')
plot.set_ylabel('Цвят (hex)')

plot.set_yticklabels(color_bars)
    
plot.invert_yaxis()

In [None]:
if SAVE_FILES:
    now = datetime.now().strftime('%d%H%M')

    fig = plot.get_figure()
    fig.savefig(f'output\\plot_colors_mod-{now}.png')

### Диаграма, преставяща 20-те най-често променяни точки

In [None]:
ddf_coordinate = ddf_main.copy()
ddf_coordinate = ddf_coordinate.drop(['timestamp', 'pixel_color'], axis=1)
active_spots = ddf_coordinate.coordinate.value_counts().compute().head(20)

active_spots.head()

In [None]:
plot = active_spots.plot(kind='barh', zorder=3,  edgecolor='#3193E4')

plot.set_title('20-те най-променяни точки')
plot.set_xlabel('Брой промени')
plot.set_ylabel('Точка (x,y)')

plot.invert_yaxis()

In [None]:
if SAVE_FILES:
    now = datetime.now().strftime('%d%H%M')

    fig = plot.get_figure()
    fig.savefig(f'output\\plot_coordinates-{now}.png')

### Кръгови диаграми и таблица, показващи броя пъти, през които е било нужно цензуриране

In [None]:
ddf_moderated = ddf_main.copy()
ddf_moderated['comma_count'] = ddf_moderated.coordinate.str.count(',')

ddf_moderated.head()

In [None]:
ddf_moderated = ddf_moderated[ddf_moderated['comma_count'] > 1]
ddf_moderated = ddf_moderated.drop('comma_count', axis=1)
ddf_moderated['timestamp'] = ddf_moderated.apply(lambda x: parse_timestamp(x['timestamp'], '%Y-%m-%d %H%M%S'), axis=1)

ddf_moderated = ddf_moderated.compute()
ddf_moderated = ddf_moderated.set_index('timestamp').reset_index()
 
ddf_moderated.head()

In [None]:
ddf_moderated['coordinate'] = ddf_moderated['coordinate'].apply(lambda x: x.split(','))

ddf_moderated['x1'] = ddf_moderated['coordinate'].apply(lambda x: x[0]).astype('uint16')
ddf_moderated['y1'] = ddf_moderated['coordinate'].apply(lambda x: x[1]).astype('uint16')
ddf_moderated['x2'] = ddf_moderated['coordinate'].apply(lambda x: x[2]).astype('uint16')
ddf_moderated['y2'] = ddf_moderated['coordinate'].apply(lambda x: x[3]).astype('uint16')

ddf_moderated = ddf_moderated.drop('coordinate', axis=1)

ddf_moderated['num_pixels'] = (ddf_moderated['x2'] - ddf_moderated['x1']) * (ddf_moderated['y2'] - ddf_moderated['y1']).astype('uint16')

ddf_moderated.head()

In [None]:
moderated_count = ddf_moderated['pixel_color'].value_counts()

moderated_count.head()

In [None]:
num_moderated_pixels = ddf_moderated.groupby('pixel_color').num_pixels.sum()

num_moderated_pixels.head(10)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)    
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.tick_params(labelcolor='#E6E6E6', top=False, bottom=False, left=False, right=False)
ax.grid(False)

ax.set_title('Цветове, използвани за редактиране')

color_pie = []
for color_num in moderated_count.iteritems():
    color_pie.append(color_num[0])

labels_pie = []
for count in moderated_count.iteritems():
    labels_pie.append(count[1])

explode_pie = [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01]

moderated_count.plot(kind='pie', colors=color_pie, labels=labels_pie, explode=explode_pie, ax=ax1, shadow=True)

ax1.set_xlabel('Брой редакции')
ax1.set_ylabel('')

color_pie = []
for color_num in num_moderated_pixels.iteritems():
    color_pie.append(color_num[0])

labels_pie = []
for count in num_moderated_pixels.iteritems():
    labels_pie.append(count[1])

num_moderated_pixels.plot(kind='pie', colors=color_pie, labels=labels_pie, explode=explode_pie, ax=ax2, startangle=30, shadow=True)

ax2.set_xlabel('Брой редактирани пиксели')
ax2.set_ylabel('')

leg = ax1.legend(labels=color_pie, loc=(0.95,0.55), prop={'size' : 18}, facecolor='#E6E6E6', edgecolor='black')
for i in range(0,len(color_pie)):
    leg.legendHandles[i].set_color(color_pie[i]) 
    leg.legendHandles[i].set_edgecolor('black')

custom_text = [f'Общ брой редакции: {moderated_count.sum()}', f'Общ брой редактирани пиксели: {num_moderated_pixels.sum()}']
custom_label = ax2.legend(labels=custom_text, loc=(-0.5,0), handlelength=0, handletextpad=0, fancybox=True, prop=dict(size=18), facecolor='#E6E6E6', edgecolor='black')
for item in custom_label.legendHandles:
    item.set_visible(False)

In [None]:
if SAVE_FILES:
    now = datetime.now().strftime('%d%H%M')

    fig.savefig(f'output\\plot_moderated-{now}.png')

In [None]:
ddf_moderated['Цвят'] = ddf_moderated['pixel_color']
ddf_moderated = ddf_moderated.rename(columns={'timestamp': 'Дата и час (UTC)', 'pixel_color': 'Цвят (hex)', 'num_pixels': 'Брой пиксели'})
ddf_moderated = ddf_moderated[['Дата и час (UTC)', 'Цвят (hex)', 'Цвят', 'x1', 'y1', 'x2', 'y2', 'Брой пиксели']]

ddf_moderated.head()

In [None]:
fig, ax = plt.subplots()

fig.patch.set_visible(False)
ax.axis('off')
ax.axis('tight')

col_widths = []
for col in range(0,len(ddf_moderated.columns)):
    if (col == 0):
        col_widths.append(0.15)
    elif (col == 2):
        col_widths.append(0.05)
    elif (col == 1 or col == 7):
        col_widths.append(0.12)
    else:
        col_widths.append(0.07)


table = ax.table(cellText=ddf_moderated.values, colLabels=ddf_moderated.columns, loc='center', colWidths=col_widths, cellLoc='center')
table.scale(1,3)
table.auto_set_font_size(False)
table.set_fontsize(16)

for (row, col), cell in table.get_celld().items():
    if (row % 2 == 0):
        cell.set_facecolor('#E6E6E6')
    else:
        cell.set_facecolor('#DCDCDC')

    if (row == 0):
        cell.set_facecolor('#89B55C')
        cell.set_text_props(fontproperties=FontProperties(size=22))
        cell.set_height(0.1)
    elif (col == 2):
        cell.set_facecolor(cell._text.get_text())
        cell.get_text().set_text('')   

fig.tight_layout()

In [None]:
if SAVE_FILES:
    now = datetime.now().strftime('%d%H%M')

    fig.savefig(f'output\\table_moderated-{now}.png')

# Анимирани диаграми

In [None]:
period_format = '%H:%M %d-%m-%Y'

bars_cmap = [
        '#000000', 
        '#00A368', 
        '#2450A4', 
        '#3690EA', 
        '#51E9F4', 
        '#7EED56', 
        '#811E9F', 
        '#898D90', 
        '#9C6926', 
        '#B44AC0', 
        '#D4D7D9', 
        '#FF4500', 
        '#FF99AA', 
        '#FFA800', 
        '#FFD635', 
        '#FFFFFF', 
        '#00756F', 
        '#009EAA', 
        '#00CC78', 
        '#493AC1', 
        '#6A5CFF', 
        '#6D482F', 
        '#BE0039', 
        '#FF3881', 
        '#00CCC0', 
        '#515252', 
        '#6D001A', 
        '#94B3FF', 
        '#DE107F', 
        '#E4ABFF', 
        '#FFB470', 
        '#FFF8B8'
]

fixed_bars_cmap = [
        '#000000', 
        '#FFFFFF', 
        '#FF4500', 
        '#2450A4', 
        '#FFD635', 
        '#BE0039', 
        '#51E9F4', 
        '#811E9F', 
        '#FFA800', 
        '#FF99AA', 
        '#3690EA', 
        '#00A368', 
        '#898D90', 
        '#7EED56', 
        '#D4D7D9', 
        '#9C6926', 
        '#FFB470', 
        '#FF3881', 
        '#B44AC0', 
        '#6D482F', 
        '#00CC78', 
        '#493AC1', 
        '#FFF8B8', 
        '#515252', 
        '#6D001A', 
        '#DE107F', 
        '#00756F', 
        '#6A5CFF', 
        '#94B3FF', 
        '#009EAA', 
        '#E4ABFF', 
        '#00CCC0'
]

### Анимирана диаграма, показваща броя поставени пиксели за всеки час

In [None]:
ddf_datecount = ddf_datecount.set_index('datetime')
ddf_datecount = ddf_datecount.compute()

ddf_datecount.head()

In [None]:
if SAVE_FILES:
    fig = plt.figure()
    ax = plt.axes()

    ax.set_title('Брой поставени пиксели за всеки час')
    ax.set_xlabel('Дата и час (UTC)')
    ax.set_ylabel('Брой поставени пиксели (милиони)')

    ax.get_yaxis().get_major_formatter().set_scientific(False)

    now = datetime.now().strftime('%d%H%M')

    ddf_datecount.plot_animated(filename=f'output\\animated_activity-{now}.gif', 
        fig=fig, 
        add_legend=False, 
        kind='line', 
        linewidth=5,
        figsize=(25,10),
        period_fmt=period_format,
        period_label=False,
        tick_label_size=16,
        label_events={    
            '  Първи ден': datetime.strptime('00:00 01-04-2022', period_format),
            '  Втори ден': datetime.strptime('00:00 02-04-2022', period_format),
            '  Трети ден': datetime.strptime('00:00 03-04-2022', period_format),
            '  Четвърти ден': datetime.strptime('00:00 04-04-2022', period_format)
        },
        antialiased=True,
        )

    clip = mp.VideoFileClip(f'output\\animated_activity-{now}.gif')
    clip.write_videofile(f'output\\animated_activity-{now}.mp4', fps=60)

### Анимирана диаграма, показваща кумулативния брой поставени пиксели

In [None]:
if SAVE_FILES:
    fig = plt.figure()
    ax = plt.axes()

    ax.set_title('Кумулативен брой поставени пиксели')
    ax.set_xlabel('Дата и час (UTC)')
    ax.set_ylabel('Брой поставени пиксели (милиони)')

    ax.yaxis.set_major_formatter(formatter)    

    now = datetime.now().strftime('%d%H%M')

    ddf_datecount.cumsum().plot_animated(filename=f'output\\animated_activity_cumsum-{now}.gif', 
        fig=fig, 
        add_legend=False, 
        kind='line', 
        figsize=(25,10),
        period_fmt=period_format,
        period_label=False,
        tick_label_size=16,
        label_events={    
            '  Първи ден': datetime.strptime('00:00 01-04-2022', period_format),
            '  Втори ден': datetime.strptime('00:00 02-04-2022', period_format),
            '  Трети ден': datetime.strptime('00:00 03-04-2022', period_format),
            '  Четвърти ден': datetime.strptime('00:00 04-04-2022', period_format)
        },
        fill_under_line_color='#89A3D5',
        antialiased=True
        )

    clip = mp.VideoFileClip(f'output\\animated_activity_cumsum-{now}.gif')
    clip.write_videofile(f'output\\animated_activity_cumsum-{now}.mp4', fps=60)

### Анимирана графика, показваща броя и вида поставевни цветове за всеки час

In [None]:
ddf_pivoted = ddf_pivoted.set_index('datetime')
ddf_pivoted = ddf_pivoted.compute()

In [None]:
if SAVE_FILES:
    fig = plt.figure()
    ax = plt.axes()

    ax.set_title('Брой поставени пиксели според цвета за всеки час')
    ax.set_ylabel('Цвят (hex)')
    ax.set_xlabel('Брой пиксели')

    now = datetime.now().strftime('%d%H%M')

    ddf_pivoted.plot_animated(filename=f'output\\animated_colors-{now}.gif',    
        period_length=700,
        bar_label_size=14,
        add_legend=False, 
        figsize=(25,10),
        fig=fig,
        period_fmt=period_format,
        fixed_order=fixed_bars_cmap,
        period_label=True,
        tick_label_size=14,
        cmap=fixed_bars_cmap,
        antialiased=True,
        fixed_max=True
        )

    clip = mp.VideoFileClip(f'output\\animated_colors-{now}.gif')
    clip.write_videofile(f'output\\animated_colors-{now}.mp4', fps=60)

### Анимирана графика, показваща кумумулативния брой поставени пиксели от всеки цвят

In [None]:
if SAVE_FILES:
    plt.rcParams['xtick.labelsize'] = 0

    fig = plt.Figure()
    gs = plt.GridSpec(1,2, width_ratios=[7,3])

    ax1 = fig.add_subplot(gs[0,0])
    ax2 = fig.add_subplot(gs[0,1])

    fig.suptitle('Кумумулативен брой поставени пиксели от всеки цвят')
    ax1.set_ylabel('Цвят (hex)')
    ax1.set_xlabel('Брой пиксели (милиони)')

    formatter = FuncFormatter(millions)
    ax1.xaxis.set_major_formatter(formatter)

    now = datetime.now().strftime('%d%H%M')

    bar_chart = ddf_pivoted.cumsum().plot_animated(   
        period_length=700,
        bar_label_size=14,
        add_legend=False, 
        figsize=(25,10),
        period_fmt=period_format,
        period_label=True,
        tick_label_size=14,
        cmap=bars_cmap,
        antialiased=True,
        )

    bar_chart.ax = ax1

    pie_chart = ddf_pivoted.cumsum().plot_animated(  
        kind='pie',
        figsize=(10,10),
        cmap=bars_cmap,
        )

    pie_chart.ax = ax2

    pandas_alive.animate_multiple_plots(f'output\\animated_colors_cumsum-{now}.gif', [bar_chart,pie_chart], fig)

    clip = mp.VideoFileClip(f'output\\animated_colors_cumsum-{now}.gif')
    clip.write_videofile(f'output\\animated_colors_cumsum-{now}.mp4', fps=60)

In [None]:
plt.style.use('style.mplstyle')

### Анимирана графика, показваща броя поставени пиксели в 5-те най-популярни точки за всеки час

In [None]:
ddf_top_coords = ddf_main.copy()
ddf_top_coords = ddf_top_coords[
    (ddf_top_coords['coordinate'] == '0,0') | 
    (ddf_top_coords['coordinate'] == '359,564') | 
    (ddf_top_coords['coordinate'] == '349,564') | 
    (ddf_top_coords['coordinate'] == '859,766') | 
    (ddf_top_coords['coordinate'] == '860,766')
]
ddf_top_coords = ddf_top_coords.drop('pixel_color', axis=1)

ddf_top_coords.head()

In [None]:
ddf_top_coords['index'] = ddf_top_coords.reset_index().index
ddf_top_coords['timestamp'] = ddf_top_coords.apply(lambda x: parse_timestamp(x['timestamp'], '%Y-%m-%d %H'), axis=1)
ddf_top_coords = ddf_top_coords.rename(columns={'timestamp': 'datetime'})

ddf_top_coords.head()

In [None]:
ddf_top_coords['coordinate'] = ddf_top_coords['coordinate'].astype('category')
ddf_top_coords['coordinate'] = ddf_top_coords.coordinate.cat.as_known()

ddf_top_coords.dtypes

In [None]:
ddf_top_coords = ddf_top_coords.pivot_table(index='datetime', columns='coordinate', values='coordinate', aggfunc='count')
ddf_top_coords = ddf_top_coords.compute()

ddf_top_coords.head()

In [None]:
if SAVE_FILES:
    fig = plt.figure()
    ax = plt.axes()

    ax.set_title('Брой поставени пиксели за всеки час в 5-те най-популярни точки')
    ax.set_xlabel('Дата и час (UTC)')
    ax.set_ylabel('Брой пиксели')

    now = datetime.now().strftime('%d%H%M')

    ddf_top_coords.plot_animated(filename=f'output\\animated_top5_coords-{now}.gif', 
        fig=fig, 
        add_legend=True, 
        linewidth=5,
        kind='line', 
        figsize=(25,10),
        period_fmt=period_format,
        period_label=False,
        tick_label_size=16,
        label_events={    
            '  Първи ден': datetime.strptime('00:00 01-04-2022', period_format),
            '  Втори ден': datetime.strptime('00:00 02-04-2022', period_format),
            '  Трети ден': datetime.strptime('00:00 03-04-2022', period_format),
            '  Четвърти ден': datetime.strptime('00:00 04-04-2022', period_format)
        },
        antialiased=True
        )

    clip = mp.VideoFileClip(f'output\\animated_top5_coords-{now}.gif')
    clip.write_videofile(f'output\\animated_top5_coords-{now}.mp4', fps=60)