# Tweet Metrics Parser
Author(s): Kiriti Yelamanchali

Goal:
This simple python code returns number of retweets, replies, favourites and Unix Timestamp of the tweet. 

Status:
This package can be installed by cloning the repo. As of version 0.0.1, the package works with Python 3.7

Usage:
This package is intended to be used as a Python module inside your other Tweet-related code. An example Python program (after installing the package) would be:
	python tweet_metrics https://twitter.com/BarackObama/status/952914779458424832

This is a Jupyter Notebook with the core code. 

In [66]:
# Import the necessary libraries
import requests
import re
import sys
from bs4 import BeautifulSoup
import logging
from logging.config import dictConfig

In [67]:
# Logging Information
dictConfig({
    'version': 1,
    'formatters': {'default': {
        'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
    }},
    'handlers': {'wsgi': {
        'class': 'logging.StreamHandler',
        'formatter': 'default'
    }},
    'root': {
        'level': 'INFO',
        'handlers': ['wsgi']
    }
})

In [68]:
# Main Function.
def parse_metrics(url):
    """
    :param url: Twitter URL example : https://twitter.com/BarackObama/status/952914779458424832
    :return: JSON with Metrics.
    """
    logging.info('Input URL = {}'.format(url))

    if url.startswith("https://twitter.com/"):
        logging.info('URL is a valid twitter URL')

        page = requests.get(url)
        soup = BeautifulSoup(page.text, 'html.parser')
        tweet_id = str([int(s) for s in url.split('/') if s.isdigit()][0])

        re_tweets_id = 'profile-tweet-action-retweet-count-aria-' + tweet_id
        replies_id = 'profile-tweet-action-reply-count-aria-' + tweet_id
        favourites_id = 'profile-tweet-action-favorite-count-aria-' + tweet_id

        rt = str(soup.find_all(id=re_tweets_id))
        rp = str(soup.find_all(id=replies_id))
        fv = str(soup.find_all(id=favourites_id))
        time = str(soup.find_all(class_="tweet-timestamp js-permalink js-nav js-tooltip"))

        try:
            rts = int(re.sub('[^0-9]', '', re.findall(re.compile(r"(?<=>)(.*)(?=</span>)"), rt)[0]))
            rps = int(re.sub('[^0-9]', '', re.findall(re.compile(r"(?<=>)(.*)(?=</span>)"), rp)[0]))
            fav = int(re.sub('[^0-9]', '', re.findall(re.compile(r"(?<=>)(.*)(?=</span>)"), fv)[0]))
            tsp = int(re.sub('[^0-9]', '', re.findall(re.compile(r"(?<=data-time-ms=\")\d{13}(?=\">)"), time)[-1]))
            logging.info('Parsing Successful')
            return {'url': url, 'retweets': rts, 'replies': rps, 'favourites': fav, 'timestamp': tsp}
        except Exception as e:
            logging.info('Encountered error {} during parsing'.format(e))
            return {'url': url, 'retweets': 0, 'replies': 0, 'favourites': 0, 'timestamp': 0}
    else:
        logging.info('URL is not a valid twitter URL')
        return {'url': url, 'retweets': 0, 'replies': 0, 'favourites': 0, 'timestamp': 0}

In [69]:
# Example
parse_metrics("https://twitter.com/BarackObama/status/952914779458424832")

[2020-05-18 14:10:47,616] INFO in <ipython-input-68-aecd398e62dd>: Input URL = https://twitter.com/BarackObama/status/952914779458424832
[2020-05-18 14:10:47,618] INFO in <ipython-input-68-aecd398e62dd>: URL is a valid twitter URL
[2020-05-18 14:10:49,005] INFO in <ipython-input-68-aecd398e62dd>: Parsing Successful


{'url': 'https://twitter.com/BarackObama/status/952914779458424832',
 'retweets': 340884,
 'replies': 27375,
 'favourites': 1375655,
 'timestamp': 1516042339000}

#### ~END~