## imports



In [30]:
from os.path import join
from json import loads
from collections import defaultdict

## environment variables

In [3]:
DATABASE_NAME = join('.', "farmers-protest-tweets-2021-03-5.json")

## classes

In [34]:
class RefInt:
    def __init__(self, reference):
        self.ref = reference
    def __lt__(self, other):
        return self.ref < other.ref

# FEATURE CODE

 ## feature 1 code

In [58]:
def top10_retweets():
    top10 = list()
    ints = dict()
    def hook(dict_):
        if type(dict_) != dict:
            return dict_
        if "url" in dict_ and "retweetCount" in dict_:
            url = dict_["url"]
            retwcount = dict_["retweetCount"]
            if url not in ints:
                ints[url] = RefInt(retwcount)
                top10.append((ints[url], url))
            elif ints[url].ref < retwcount:
                ints[url].ref = retwcount
                if (ints[url], url) not in top10:
                    top10.append((ints[url], url))
            else:
                return {}
            top10.sort(reverse=True)
            del top10[10:]
            return {}
    with open(DATABASE_NAME, "r", encoding="utf-8") as file:
        for line in file:
            loads(f'{line.strip()}',  object_hook=hook)
    return ("number   | url", tuple((n.ref, url) for n, url in top10))

('number   | url', ((904547, 'https://twitter.com/MrBeast/status/1362997215308906498'), (315557, 'https://twitter.com/rihanna/status/1356625889602199552'), (236683, 'https://twitter.com/pulte/status/1365015962433564675'), (152380, 'https://twitter.com/rihanna/status/254929287726116864'), (137786, 'https://twitter.com/Twitter/status/1357000724593442816'), (123338, 'https://twitter.com/KingSalman/status/1330103721506840578'), (116239, 'https://twitter.com/elonmusk/status/1358542364948668418'), (115492, 'https://twitter.com/elonmusk/status/1357244295091208193'), (111737, 'https://twitter.com/rihanna/status/1351997615328661506'), (103960, 'https://twitter.com/GretaThunberg/status/1356694884615340037')))


## feature 2 code

In [55]:
def top10_twit_senders():
    top10 = list()
    twits = defaultdict(set)
    def hook(dict_):
        if type(dict_) != dict:
            return dict_
        elif "id" in dict_ and "user" in dict_:
            ret = {
                "id": dict_["id"]
            }
            if "user" in dict_:
                ret.update({"user": dict_["user"]["id"]})
            return ret
        elif "id" in dict_:
            return {
                "id": dict_["id"]
            }
    with open(DATABASE_NAME, "r", encoding="utf-8") as file:
        for line in file.readlines():
            data = loads(f'{line.strip()}',  object_hook=hook)
            twits[data["user"]].add(data["id"])
    top = [(len(set_), user) for user, set_ in twits.items()]
    top.sort(reverse=True)
    return (("number   | user id"), top[:10])

number  | user id
    7134, 980633168981061632
    2091, 69027875
    1991, 2930682630
    1841, 452391771
    1806, 1354532795847073796
    1722, 48085084
    1502, 872085660886282240
    1460, 1340186972615307264
    1453, 1309302936317636608
    1446, 921732117024919552


## feature 3 code

In [62]:
def top10_twit_days():
    top10 = list()
    twits = set()
    days = defaultdict(set)
    def hook(dict_):
        if type(dict_) != dict:
            return dict_
        elif "id" in dict_ and "created" in dict_:
            ret = {
                "id": dict_["id"]
            }
            if dict_["id"] in twits:
                return {}
            if "created" in dict_:
                if not dict_["created"]:
                    return {}
                ret.update({"created": dict_["created"].split("T")[0]})
            days[dict_["created"].split("T")[0]].add(dict_["id"])
            twits.add(dict_["id"])
            return ret
    with open(DATABASE_NAME, "r", encoding="utf-8") as file:
        for line in file.readlines():
            loads(f'{line.strip()}',  object_hook=hook)
    top = [(len(set_), user) for user, set_ in days.items()]
    top.sort(reverse=True)
    return (("number   | date"), top[:10])

('number   | date',
 [(1379, '2020-11-29'),
  (1279, '2021-02-03'),
  (1269, '2020-12-03'),
  (1128, '2021-02-04'),
  (988, '2020-12-01'),
  (958, '2020-11-30'),
  (849, '2020-12-04'),
  (782, '2020-12-02'),
  (587, '2021-02-05'),
  (562, '2020-11-28')])

## feature 4 code

In [70]:
def top10_hashtags():
    top10 = list()
    hashtags = defaultdict(set)
    def hook(dict_):
        if type(dict_) != dict:
            return dict_
        elif "url" in dict_ and "content" in dict_:
            ret = {
                "id": dict_["id"]
            }
            if "content" in dict_:
                ret.update({"content": dict_["content"]})
            return ret
        elif "id" in dict_:
            return {
                "id": dict_["id"]
            }
    with open(DATABASE_NAME, "r", encoding="utf-8") as file:
        for line in file.readlines():
            data = loads(f'{line.strip()}',  object_hook=hook)
            twit_hashtags = list()
            current = ""
            in_hashtag = False
            for c in data["content"]:
                if "#" == c:
                    in_hashtag = True
                elif c == " ":
                    if in_hashtag:
                        twit_hashtags.append("#"+current)
                        current = ""
                    in_hashtag = False
                else:
                    if in_hashtag:
                        current += c
            if in_hashtag:
                twit_hashtags.append("#"+current)
            for hashtag in twit_hashtags:
                hashtags[hashtag.lower()].add(data["id"])
    top = [(len(set_), user) for user, set_ in hashtags.items()]
    top.sort(reverse=True)
    return (("number   | Hashtag"), top[:10])

('number   | Hashtag', [(354100, '#farmersprotest'), (15084, '#istandwithfarmers'), (9478, '#farmersareindia'), (9285, '#indianfarmershumanrights'), (9174, '#standwithfarmers'), (8323, '#rihanna'), (7868, '#farmers'), (7865, '#farmersprotests'), (5181, '#shameonbollywood'), (4933, '#india')])


## main function code

In [None]:
def Main():
    functions = {"1": top10_retweets, "2": top10_twit_senders, "3": top10_twit_days, "4": top10_hashtags}
    while True:
        print("choose an action")
        print("[1] top 10 most retweeted tweets")
        print("[2] top 10 users with most tweets")
        print("[3] top 10 days with most tweets")
        print("[4] top 10 most used hashtags")
        print("[any other] Exit")
        input_ = input("$>").strip()
        if input_ not in functions:
            break
        out1, out2 = functions[input_]()
        print(out1)
        for out21, out22 in out2:
            print(f"{out21:8d} | {out22}")
    print("goodbye")

# Main function execution

In [None]:
Main()