In [None]:
import json
# Count the number of likes and comments per word
from collections import defaultdict

import plotly.graph_objects as go
from wordcloud import WordCloud

In [None]:
file = 'buynomics-posts.json'
with open(file, 'r') as f:
    posts = json.load(f)

In [None]:
# General introduction: mathematical operations, printing

In [None]:
# Data structures: lists, dicts, sets

In [None]:
# Check out a random post
post = posts[52]
post

In [None]:
# Find the most liked post

In [None]:
# Find the most commented post

In [None]:
# Searching posts:
# - Find how many posts mention "12k+"
# - Find the context in which "12k+" was used
counter = 0
marked = []
key = "12k+"
for i, post in enumerate(posts):
    if key in post['text']:
        counter += 1
        marked.append(i)
print(counter, marked)
print(posts[marked[0]])

In [None]:
# For the selected post:
# - count the number of words
# - standardize the words
# - count the number of unique words
# - find the longest word
# - find the second-longest word
# - find the most popular word
# - list all hashtags
t = post['text']
words = t.split()

def standardize(words):
    out = []
    chars = ' ,.!?;:'
    for word in words:
        w = word.strip(chars)
        w = w.lower()
        out.append(w)
    return out

words = standardize(words)

print(len(words))
print(len(set(words)))

def longest(words):
    length = 0
    w =''
    for word in words:
        if len(word) > length:
            length = len(word)
            w = word
    return w

def popular(words):
    length = 0
    w =''
    for word in words:
        if len(word) > length:
            length = len(word)
            w = word
    return w

longest(words)

In [None]:
liked_words = defaultdict(lambda: 0)
commented_words = defaultdict(lambda: 0)
total_words = defaultdict(lambda: 0)
for post in posts:
    text = post['text']
    likes = post['likesCount']
    comments = post['commentsCount']
    words = text.split()
    words = standardize(words)
    for word in words:
        liked_words[word] += likes
        commented_words[word] += comments
        total_words[word] += 1

liked_words

In [None]:
normalized = True

# normalized
liked_words_list = [(like / (total_words[word] if normalized else 1), word) for word, like in liked_words.items()]
liked_words_list = sorted(liked_words_list, reverse=True)
# print(liked_words_list)

scores = [e[0] for e in liked_words_list]
labels = [e[1] for e in liked_words_list]
cutoff = 500

fig = go.Figure([go.Bar(x=labels[:cutoff], y=scores[:cutoff])])
fig.update_yaxes(title='Score')
fig.update_xaxes(title='Word')
fig.update_layout(title = f"{cutoff} most impactful words in Buynomics' posts (likes)")
fig.show()

In [None]:
height =600
width = 2 * height
likes_cloud = WordCloud(height=height, width = width).fit_words({word: score for score, word in liked_words_list})
likes_cloud.to_image()

In [None]:
# normalized
commented_words_list = [(comment / (total_words[word] if normalized else 1), word) for word, comment in commented_words.items()]
commented_words_list = sorted(commented_words_list, reverse=True)
# print(liked_words_list)

scores = [e[0] for e in commented_words_list]
labels = [e[1] for e in commented_words_list]

fig = go.Figure([go.Bar(x=labels[:cutoff], y=scores[:cutoff])])
fig.update_yaxes(title='Score')
fig.update_xaxes(title='Word')
fig.update_layout(title = f"{cutoff} most impactful words in Buynomics' posts (comments)")
fig.show()

In [None]:
likes_cloud = WordCloud(height=height, width = width).fit_words({word: score for score, word in commented_words_list})
likes_cloud.to_image()