In [50]:
import json
import glob
from datetime import datetime
from collections import defaultdict
import matplotlib.pyplot as plt
import pandas as pd

supporting_israel = [
    "Jewish", 
    "Judaism", 
    "IsraelUnderAttack", 
    "IsraelPalestine", 
    "IsraelICYMI", 
    "IsraelWar", 
    "Israel", 
    "IsraelVsHamas"
]

opposing_israel = [
    "Palestine", 
    "IsraelPalestine", 
    "AskMiddleEast", 
    "IsraelHamasWar", 
    "islam", 
    "israelexposed", 
    "exmuslim", 
    "IsraelCrimes", 
    "PalestinianViolence", 
    "AntiSemitismInRedditIsraelWarVideoReport", 
    "MuslimLounge", 
    "Muslim", 
    "Gaza", 
    "MuslimCorner", 
    "PalestinianvsIsrael"
]

In [51]:
def determine_supporting(subreddit):
    if subreddit in supporting_israel:
        return "supporting"
    elif subreddit in opposing_israel:
        return "opposing"
    else:
        return "neutral"
    
def convert_timestamp_to_date(timestamp):
    """Convert a Unix timestamp to a date string in 'YYYY-MM-DD' format."""
    return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d')

def prepare_topic_data(topic):
    dates = set(topic.support_references_per_date.keys()).union(
        topic.oppose_references_per_date.keys(),
        topic.neutral_references_per_date.keys()
    )
    data = []
    for date in sorted(dates):
        data.append({
            "date": datetime.strptime(date, "%Y-%m-%d"),
            "supporting": topic.support_references_per_date.get(date, 0),
            "opposing": topic.oppose_references_per_date.get(date, 0),
            "neutral": topic.neutral_references_per_date.get(date, 0),
        })
    return pd.DataFrame(data)


In [52]:

class Topic:
    topics = [] 
    
    def __init__(self, name, keywords):
        self.name = name
        self.keywords = keywords
        self.support_references_per_date = {}
        self.oppose_references_per_date = {} 
        self.neutral_references_per_date = {} 

        self.total_references = 0
        Topic.topics.append(self)
    
    def add_reference(self, date, support):
        """Add a reference for a specific date."""
        if support == "supporting":
            if date in self.support_references_per_date:
                self.support_references_per_date[date] += 1
            else:
                self.support_references_per_date[date] = 1
        elif support == "opposing":
            if date in self.oppose_references_per_date:
                self.oppose_references_per_date[date] += 1
            else:
                self.oppose_references_per_date[date] = 1
        else:
            if date in self.neutral_references_per_date:
                self.neutral_references_per_date[date] += 1
            else:
                self.neutral_references_per_date[date] = 1

        self.total_references += 1

    def get_references(self, support):
        """Returns the dictionary of references per date."""
        if support == "supporting":
            return self.support_references_per_date
        elif support == "opposing":
            return self.oppose_references_per_date
        else:
            return self.neutral_references_per_date
        
    def __str__(self):
        """String representation for easy printing."""
        return f"Topic: {self.name}, Keywords: {self.keywords}, Total References: {self.total_references}, References per Date: {self.references_per_date}"



In [None]:
attacks = Topic("attacks", ["attack", "hospital", "bomb", "kill", "injure", "violence", "war", "conflict", "fight", "combat", "battle", "assault", "strike", "clash", "offensive", "onslaught", "bombard", "besiege", "invade", "raid", "beset", "pound", "blitz", "shell", "strafe", "blow up", "destroy", "demolish", "flatten", "level", "raze", "wreck", "ruin", "annihilate", "exterminate", "eradicate", "eliminate", "extinguish", "obliterate", "decimate", "massacre", "butcher", "slaughter"])

comment_files = glob.glob("./filtered_comments/*")
conversations_files = glob.glob("./filtered_conversations/*")
submissions_files = glob.glob("./filtered_submissions/*")

joined_list = comment_files + conversations_files + submissions_files


keyword_to_topics = defaultdict(set)
for topic in Topic.topics:
    for keyword in topic.keywords:
        keyword_to_topics[keyword].add(topic)

for file in joined_list:
    with open(file, "r") as f:
        data = json.load(f)
        for post in data:
            post_support = determine_supporting(post["subreddit"])
            post_date = convert_timestamp_to_date(float(post["timestamp"]))
            for keyword, topics in keyword_to_topics.items():
                if keyword in post["text"]:
                    for topic in topics:
                        topic.add_reference(post_date, post_support)



In [None]:
for topic in Topic.topics:
    df = prepare_topic_data(topic)
    plt.figure(figsize=(10, 6))
    plt.plot(df["date"], df["supporting"], label="Supporting", marker='o')
    plt.plot(df["date"], df["opposing"], label="Opposing", marker='x')
    plt.plot(df["date"], df["neutral"], label="Neutral", marker='s')
    
    plt.title(f"Topic: {topic.name}")
    plt.xlabel("Date")
    plt.ylabel("Number of References")
    plt.legend()
    plt.grid(True)
    plt.show()