# Imports

In [31]:
import os
import json

import re
import string

In [32]:
from nltk import WordNetLemmatizer, tokenize
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer
from translate import Translator

# Global Variables

In [33]:
minLikes = 10000000
maxLikes = 0

minReplies = 10000000
maxReplies = 0

minRetweets = 10000000
maxRetweets = 0

# Helper Data Class

In [34]:
class PredictedCandidatePair:
    def __init__(self, candidate1, candidate2, predicted1, predicted2):
        self.candidate1 = candidate1
        self.candidate2 = candidate2
        self.predicted1 = predicted1
        self.predicted2 = predicted2

    def __repr__(self):
        if self.predicted1 > self.predicted2:
            return f"{self.candidate1} | {self.predicted1} Is More Likely To Win Than {self.candidate2} | {self.predicted2} ({self.predicted_percent_margin_of_victory()}% Confidence)"
        elif self.predicted2 > self.predicted1:
            return f"{self.candidate2} | {self.predicted2} Is More Likely To Win Than {self.candidate1} | {self.predicted1} ({self.predicted_percent_margin_of_victory()}% Confidence)"

    def predicted_margin_of_victory(self):
        return self.predicted1 - self.predicted2

    def predicted_percent_margin_of_victory(self):
        total = self.predicted1 + self.predicted2
        return (abs(self.predicted1 - self.predicted2) / total) * 100

# Utility Functions

In [35]:
# Utility Method To Find All Politicians Whose Tweets Are Available

def get_politicians():
    path = "tweets-test"
    folders = [folder for folder in os.listdir(path) if os.path.isdir(os.path.join(path, folder))]

    for folder in folders:
        yield folder

In [36]:
# Utility Method To Find All The Available Tweets From A Given Politician

def get_politicians_tweets(politician):
    global maxLikes
    global minLikes

    global maxReplies
    global minReplies

    global maxRetweets
    global minRetweets

    path = f"tweets-test/{politician}"
    tweets = []

    for filename in os.listdir(path):
        file_path = os.path.join(path, filename)

        if os.path.isfile(file_path):
            with open(file_path, 'r', encoding="utf-8") as file:
                data = json.load(file)

                if data["Likes"] > maxLikes:
                    maxLikes = data["Likes"]
                elif data["Likes"] < minLikes:
                    minLikes = data["Likes"]

                if data["Replies"] > maxReplies:
                    maxReplies = data["Replies"]
                elif data["Replies"] < minReplies:
                    minReplies = data["Replies"]

                if data["Retweets"] > maxRetweets:
                    maxRetweets = data["Retweets"]
                elif data["Retweets"] < minRetweets:
                    minRetweets = data["Retweets"]

                tweets.append(data)

    return tweets

In [37]:
def store_results(results):
    with open("./results.json", "w+") as file:
        json.dump(results, file)

# Functions For Sentiment Analysis

In [38]:
def sanitize_text(text):
    text = text.lower()
    text = re.sub(r'\n', '', text)

    translator = str.maketrans('', '', string.punctuation)
    text = text.translate(translator)

    lemmatizer = WordNetLemmatizer()
    words = tokenize.word_tokenize(text)
    words = [lemmatizer.lemmatize(word) for word in words]

    stop_words = stopwords.words("english")
    filtered_text = [word for word in words if not word in stop_words]

    return " ".join(filtered_text)

In [39]:
def sentiment_polarity(text):
    sia = SentimentIntensityAnalyzer()

    score = sia.polarity_scores(sanitize_text(text))
    key = list(score.keys())[list(score.values()).index(max(list(score.values())[:len(score) - 1]))]

    return score, key

In [40]:
def translate_text(json_tweet):
    if json_tweet["Language"] != "kn":
        return json_tweet["Tweet"]

    translator = Translator(from_lang='kn', to_lang="en")
    translation = translator.translate(json_tweet["Tweet"])

    return translation

# Core Functions

In [41]:
def calculate_score(json_tweet):
    text = json_tweet["Tweet"]

    if json_tweet["Language"] != "en":
        text = translate_text(json_tweet)

    score, key = sentiment_polarity(text)

    sent_pol = (score["pos"] - score["neg"]) if (score["pos"] - score["neg"]) > 0 else 0  # Ensures The Sentiment Polarity Is Positive
    likes_score = (json_tweet["Likes"] - minLikes) / (maxLikes - minLikes)  # Scales The Likes Score To The Range Of 0.0 to 1.0
    replies_score = (json_tweet["Replies"] - minReplies) / (maxReplies - minReplies)  # Scales The Replies Score To The Range Of 0.0 to 1.0
    retweets_score = (json_tweet["Retweets"] - minRetweets) / (maxRetweets - minRetweets)  # Scales The Retweets Score To The Range Of 0.0 to 1.0

    # The Final Total Score Is From 0.0 to 100.0
    total_score = 25 * (sent_pol + likes_score + replies_score + retweets_score)
    return total_score

In [42]:
def analyze_politician(politician):
    total_score = 0
    tweet_count = 0
    all_politicians_tweets = get_politicians_tweets(politician)

    for tweet in all_politicians_tweets:
        total_score += calculate_score(tweet)
        tweet_count += 1

    avg_score = total_score / tweet_count if tweet_count != 0 else 0

    return avg_score

In [43]:
def analyze():
    scores = {}
    all_politicians = get_politicians()

    while True:
        try:
            politician = next(all_politicians)
            scores.update({ politician: analyze_politician(politician) })

        except StopIteration:
            break

    store_results(scores)

In [44]:
def conclude():
    with open("./settings.json", 'r', encoding="utf-8") as file:
        settings = json.load(file)

    with open("./results.json", 'r', encoding="utf-8") as file:
        results = json.load(file)

    candidates = settings["Candidates"]
    candidatePairs = []

    for i in range(0, len(candidates), 2):
        if candidates[i]["Name"] in results.keys() and candidates[i + 1]["Name"] in results.keys():
            candidatePairs.append(
                PredictedCandidatePair(candidates[i]["Name"], candidates[i + 1]["Name"], results[candidates[i]["Name"]], results[candidates[i + 1]["Name"]])
            )

    total_predictions = len(candidatePairs)
    total_predicted_margin = 0
    total_predicted_percent_margin = 0

    print("All Predicted Pairs:")
    for pair in candidatePairs:
        print(pair)

        total_predicted_margin += pair.predicted_margin_of_victory()
        total_predicted_percent_margin += pair.predicted_percent_margin_of_victory()

    print(
        f"\nResults:\n"
        f"Total Pairs: {total_predictions}\n"
        f"Average Predicted Net Margin: {(total_predicted_margin / total_predictions) if total_predictions != 0 else 0}\n"
        f"Average Predicted Percent Margin: {(total_predicted_percent_margin / total_predictions) if total_predictions != 0 else 0}%\n"
    )

# Code Entry Point

In [45]:
def main():
    analyze()
    conclude()


if __name__ == '__main__':
    main()

All Predicted Pairs:
Ravi Subramanya L. A | 4.77654655750704 Is More Likely To Win Than UB Venkatesh | 1.0745990524108924 (63.26876396343981% Confidence)
C.P. Yogeeshwara | 26.13252688172043 Is More Likely To Win Than H. D. Kumaraswamy | 3.445504753987188 (76.70227149376868% Confidence)
D. K. Shivakumar | 26.830414738909372 Is More Likely To Win Than R. Ashoka | 2.429866912390995 (83.39136347798615% Confidence)
Siddaramaiah | 25.088561166091928 Is More Likely To Win Than V. Somanna | 8.827401734702175 (47.94544527293671% Confidence)
M. Krishnappa | 1.0495744237436155 Is More Likely To Win Than H. Ravindra | 0.9307659049408527 (5.99939904681368% Confidence)

Results:
Total Pairs: 5
Average Predicted Net Margin: 13.38637367438695
Average Predicted Percent Margin: 55.461448650989%

