In [None]:
# TODO read from parameters
BASE_PATH = '../facebook-denisgrafov/'

In [None]:
import json
from datetime import datetime
from pytz import timezone
import pytz


def is_group_comment(comment_json):
    if 'group' in comment_json['data'][0]['comment']:
        return 1
    return 0


def add_to_history(history, year, month):
    if year not in history:
        history[year] = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 11: 0, 12: 0}
    history[year][month] += 1

    
def add_empty_months(history):
    for year in history.values():
        for month in range(1, 13):
            if month not in year:
                year[month] = 0
                
                
def process_json_item_dates(item, history, unique_days, times_of_day_week, times_of_day_weekend):
        # get the date of the item
        date_utc = datetime.utcfromtimestamp(item['timestamp']).replace(tzinfo=pytz.utc)
        date = date_utc.astimezone(timezone('Europe/Berlin'))

        # increment days counter
        unique_days[date.weekday()] += 1

        # add the comment to the history per year and month (2 dimensional)
        add_to_history(history, date.year, date.month)
        
        # increment times of day counter
        if date.weekday() < 5:
            # week day
            times_of_day_week[date.hour] += 1
        else:
            #weekend
            times_of_day_weekend[date.hour] += 1


with open(BASE_PATH + 'comments/comments.json') as json_data:
    comments_json = json.load(json_data)
    group = 0
    comments_per_unique_days = [0] * 7
    comments_history = {}
    
    comments_times_of_day_week = [0] * 24
    comments_times_of_day_weekend = [0] * 24
    
    for comment in comments_json['comments']:
        group += is_group_comment(comment)

        process_json_item_dates(comment, comments_history, comments_per_unique_days, comments_times_of_day_week, comments_times_of_day_weekend)
# add empty months
add_empty_months(comments_history)

In [None]:
import matplotlib.pyplot as plt

SMALL_SIZE = 14
MEDIUM_SIZE = 18
BIGGER_SIZE = 20
FIGURE_WIDTH = 10.0
FIGURE_HEIGHT = 8.0

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
plt.rc('figure', figsize=(FIGURE_WIDTH, FIGURE_HEIGHT)) # default size of figures (inches)

In [None]:
labels = ('Comments to groups', 'Personal comments')
sizes = [group, len(comments_json['comments']) - group]
colors = ['lightcoral', 'lightskyblue']
explode = (0.1, 0) 
 
plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=140)

plt.text(-2, 1, 'Total comments: ' + str(len(comments_json['comments'])) 
         + '\nComments to groups: ' + str(group) 
         + '\nPersonal comments: ' + str(len(comments_json['comments']) - group))
 
plt.axis('equal')
plt.show()

In [None]:
def bar_horizontal_grid(ax, bars, heights, title = None):
    ax.bar(bars, heights)
    ax.yaxis.grid()
    ax.get_xaxis().set_ticks(bars)
    if title:
        ax.set_title(title)
    return ax


def plot_items_per_year(ax, history, title=None):
    per_year = {}
    for year, months_of_year in history.items():
        per_year[year] = sum(months_of_year.values())

    bars, heights = zip(*sorted(per_year.items()))
    return bar_horizontal_grid(ax, bars, heights, title)


def show_single_plot(func, data, title, size=None):
    f, ax = plt.subplots()
    func(ax, data)
    f.suptitle(title)
    if size:
        f.set_size_inches(size)
    plt.show()


show_single_plot(plot_items_per_year, comments_history, 'Comments per year')

In [None]:
import calendar


def plot_items_per_month(ax, history, title=None):
    per_unique_months = [0] * 12
    for months_of_year in history.values():
        for month, value in months_of_year.items():
            per_unique_months[month - 1] += value    
    
    bars = calendar.month_name[1:]    
    
    bar_horizontal_grid(ax, bars, per_unique_months, title).tick_params(axis='x', rotation=90)


show_single_plot(plot_items_per_month, comments_history, 'Comments per month')

In [None]:
def plot_items_per_weekday(ax, weekdays, title=None):
    bars = calendar.day_name
    
    bar_horizontal_grid(ax, bars, weekdays, title)
    
    
show_single_plot(plot_items_per_weekday, comments_per_unique_days, 'Comments per weekday')

In [None]:
def flatten_history(history):
    flat_history = {}
    for year, months_in_year in history.items():
        for month, value in months_in_year.items():
            month_id = str(year) + '.{:0=2d}'.format(month)
            flat_history[month_id] = value    
    return flat_history


def plot_items_history(ax, flat_history):
    bars, heights = zip(*sorted(flat_history.items()))
    
    line, = ax.plot(bars, heights)
    ax.yaxis.grid()
    ax.get_xaxis().set_ticks(bars)
    ax.tick_params(axis='x', rotation=90)
    return line


show_single_plot(plot_items_history, flatten_history(comments_history), 'Comments history (months)', (FIGURE_WIDTH * 2, FIGURE_HEIGHT))

In [None]:
x = range(len(comments_times_of_day_weekend))

f, (week_plot, weekend_plot) = plt.subplots(1, 2, sharey=True)
f.set_size_inches(FIGURE_WIDTH * 2, FIGURE_HEIGHT)
f.suptitle('Comments per time of a day')

bar_horizontal_grid(week_plot, x, comments_times_of_day_week, 'Week days')
bar_horizontal_grid(weekend_plot, x, comments_times_of_day_weekend, 'Weekend days')\
    .annotate('"Thank you!" comments\nafter my birthday', xy=(11, 25), xytext=(14, 32), arrowprops=dict(facecolor='black'))

plt.show()

In [None]:
for comment in comments_json['comments']:
    # get the date of the comment
    date_utc = datetime.utcfromtimestamp(comment['timestamp']).replace(tzinfo=pytz.utc)
    date = date_utc.astimezone(timezone('Europe/Berlin'))
    if date.weekday() > 4 and date.hour == 11:
        print(date.strftime('%b %d %Y %H:%M:%S'))    

In [None]:
import re

with open(BASE_PATH + 'likes_and_reactions/posts_and_comments.json') as json_data:
    likes_json = json.load(json_data)
    likes_per_unique_days = [0] * 7
    likes_history = {}
    
    likes_times_of_day_week = [0] * 24
    likes_times_of_day_weekend = [0] * 24
    
    likes_targets = {}
    
    for like in likes_json['reactions']:
        title = like['title']
        # Extracting word from the following line: 's comment.
        title = title[title.find('\'s') + 3:]
        
        target = re.split('\W+', title, 1)[0]
        
        if target in likes_targets:
            likes_targets[target] += 1
        else:
            likes_targets[target] = 1
            
        process_json_item_dates(like, likes_history, likes_per_unique_days, likes_times_of_day_week, likes_times_of_day_weekend)
# add empty months
add_empty_months(likes_history)


In [None]:
labels = ('Posts', 'Others', 'Comments', 'Photos')
others = 0
for key, value in likes_targets.items():
    if key != 'post' and key != 'photo' and key != 'comment':
        others += value
sizes = [likes_targets['post'], others, likes_targets['comment'], likes_targets['photo']]

explode = (0.1, 0, 0, 0) 
 
plt.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=0)

plt.text(-2, 1, 'Total likes: ' + str(len(likes_json['reactions'])) 
         + '\nLikes to posts: ' + str(likes_targets['post']) 
         + '\nLikes to photos: ' + str(likes_targets['photo'])
         + '\nLikes to comments: ' + str(likes_targets['comment'])
         + '\nLikes to links: ' + str(likes_targets['link'])
         + '\nLikes to videos: ' + str(likes_targets['video'])
         + '\nLikes to activities: ' + str(likes_targets['activity'])
         + '\nLikes to album: ' + str(likes_targets['album'])
         + '\nLikes to biographies: ' + str(likes_targets['bio'])
         + '\nLikes to notes: ' + str(likes_targets['note'])
         + '\nLikes to life: ' + str(likes_targets['life']))
 
plt.axis('equal')
plt.show()

In [None]:
f, (likes_plot, comments_plot) = plt.subplots(1, 2, sharey=True)
f.set_size_inches(FIGURE_WIDTH * 2, FIGURE_HEIGHT)
f.suptitle('Likes and comments per year')
plot_items_per_year(likes_plot, likes_history, 'Likes per year')
plot_items_per_year(comments_plot, comments_history, 'Comments per year')

plt.show()

In [None]:
f, (likes_plot, comments_plot) = plt.subplots(1, 2, sharey=True)
f.set_size_inches(FIGURE_WIDTH * 2, FIGURE_HEIGHT)
f.suptitle('Likes and comments per month')
plot_items_per_month(likes_plot, likes_history, 'Likes per month')
plot_items_per_month(comments_plot, comments_history, 'Comments per month')

plt.show()

In [None]:
f, (likes_plot, comments_plot) = plt.subplots(1, 2, sharey=True)
f.set_size_inches(FIGURE_WIDTH * 2, FIGURE_HEIGHT)
f.suptitle('Likes and comments per weekday')
plot_items_per_weekday(likes_plot, likes_per_unique_days, 'Likes per weekday')
plot_items_per_weekday(comments_plot, comments_per_unique_days, 'Comments per weekday')

plt.show()

In [None]:
flat_comments_history = flatten_history(comments_history)
flat_likes_history = flatten_history(likes_history)
#likes history is longer than comments history:
diff = set(flat_likes_history.keys()) - set(flat_comments_history.keys())
for month in diff:
    flat_comments_history[month] = 0 

f, ax = plt.subplots()
likes_line = plot_items_history(ax, flat_likes_history)
comments_line = plot_items_history(ax, flat_comments_history)
ax.legend((likes_line, comments_line), ('Likes', 'Comments'))

f.suptitle('Likes and comments history (months)')

f.set_size_inches(FIGURE_WIDTH * 2, FIGURE_HEIGHT)
plt.show()

In [None]:
x = range(len(likes_times_of_day_week))

f, axes = plt.subplots(2, 2, sharey=True)
f.set_size_inches(FIGURE_WIDTH * 2, FIGURE_HEIGHT * 2)
f.suptitle('Likes and comments per time of a day')

bar_horizontal_grid(axes[0][0], x, likes_times_of_day_week, 'Likes times on weekdays')
bar_horizontal_grid(axes[0][1], x, likes_times_of_day_weekend, 'Likes times on weekends')

bar_horizontal_grid(axes[1][0], x, comments_times_of_day_week, 'Comments times on weekdays')
bar_horizontal_grid(axes[1][1], x, comments_times_of_day_weekend, 'Comments times on weekends')

plt.show()

In [None]:
import os

# traverse root directory, and list directories as dirs and files as files
for root, dirs, files in os.walk(BASE_PATH + 'messages'):
    if 'message.json' in files:
        print(root + '/message.json')