In [5]:
import json
from collections import Counter
import re
# import emoji

missing_users_map = {
    339903300994596884: "edgii",
    596340515947937793: "3psilon9"
}

def analyze_discord_data(data):
    # Initialize counters and storage
    author_message_counts = Counter()
    author_word_counts = Counter()
    word_counts = Counter()
    emoji_counts = Counter()
    thread_message_counts = Counter()
    thread_author_messages = {}
    thread_reaction_counts = Counter()
    thread_reactions = {}  # Store reactions per thread
    thread_reaction_authors = {}  # Store reaction authors per thread
    author_reaction_counts = Counter()  # Track reactions given by each author
    total_messages = 0

    threads = {}
    
    # Process each thread
    for thread in data:
        thread_name = thread.get("Title", "Unnamed Thread")
        messages = thread["Messages"]
        total_messages += len(messages)
        thread_message_counts[thread_name] = len(messages)
        thread_author_messages[thread_name] = Counter()
        threads[thread_name] = thread
        
        # Count thread reactions and store per-thread reactions with authors
        thread_reactions[thread_name] = Counter()
        thread_reaction_authors[thread_name] = {}
        if "Reactions" in thread:
            for reaction in thread["Reactions"]:
                reaction_name = reaction["Value"]
                reaction_users = [user["AuthorName"] if user["AuthorName"] else missing_users_map[user["AuthorId"]] if user["AuthorId"] in missing_users_map else str(user["AuthorId"]) 
                                for user in reaction["Users"]]
                reaction_count = len(reaction_users)
                thread_reaction_counts[reaction_name] += reaction_count
                thread_reactions[thread_name][reaction_name] = reaction_count
                thread_reaction_authors[thread_name][reaction_name] = reaction_users
                
                # Count reactions per author
                for user in reaction_users:
                    author_reaction_counts[user] += 1
        
        # Process each message
        for msg in messages:
            author_id = msg["AuthorId"]
            author_name = msg["AuthorName"]
            if author_id == 1003326182261014568:
                continue
            if author_id in missing_users_map:
                author_name = missing_users_map[author_id]
            
            # Count messages per author in this thread
            author_key = author_name if author_name is not None else str(author_id)
            thread_author_messages[thread_name][author_key] += 1
            
            # Count messages per author overall
            author_message_counts[author_name if author_name is not None else author_id] += 1
            
            # Count words
            if msg["Content"]:
                # Split into words and convert to lowercase
                words = re.findall(r'\b\w+\b', msg["Content"].lower())
                word_counts.update(words)
                
                # Count words per author
                author_word_counts[author_name if author_name is not None else author_id] += len(words)
                
                # Find emojis (both Unicode and Discord custom)
                # Unicode emojis
                # emojis = emoji.emoji_list(msg["Content"])
                # emoji_counts.update(e["emoji"] for e in emojis)
                
                # Discord custom emojis like <:votePepeYes:970735414325432480>
                custom_emojis = re.findall(r'<:\w+:\d+>', msg["Content"])
                emoji_counts.update(custom_emojis)

    # Print results
    print("=== Discord Thread Analysis ===\n")
    
    print(f"Total Messages: {total_messages}\n")
    
    print("Top 10 Most Active Users by Message Count:")
    for author_id, count in author_message_counts.most_common(10):
        print(f"{author_id}: {count} messages")
    print()
    
    print("Top 10 Most Active Users by Word Count:")
    for author_id, count in author_word_counts.most_common(10):
        print(f"{author_id}: {count} words")
    print()
    
    print("Top 10 Most Active Users by Reactions Given:")
    for author_id, count in author_reaction_counts.most_common(10):
        print(f"{author_id}: {count} reactions")
    print()
    
    print("Top 10 Biggest Threads:")
    for thread_name, count in thread_message_counts.most_common(10):
        print(f"{thread_name}: {count} messages; reactions: {', '.join(f'{reaction}: {thread_reaction_authors[thread_name][reaction]}' for reaction in thread_reactions[thread_name])}")
        for author, msg_count in thread_author_messages[thread_name].most_common(3):
            print(f"  {author}: {msg_count} messages")
        print()
    
    print("Overall Thread Reactions:")
    for reaction, count in thread_reaction_counts.most_common():
        print(f"{reaction}: {count} total reactions")
        # Print top 5 users who used this reaction
        reaction_users = Counter()
        for thread_name in thread_reactions:
            if reaction in thread_reaction_authors[thread_name]:
                reaction_users.update(thread_reaction_authors[thread_name][reaction])
        print("  Top users:")
        for user, user_count in reaction_users.most_common(5):
            print(f"    {user}: {user_count} times")
    print()
    
    print("Top 20 Most Used Words:")
    # Filter out common words/characters
    stop_words = {'the', 'is', 'and', 'to', 'a', 'in', 'that', 'it', 'of', 'i', 'you', 'me', 'my', 'have', 'has', 'had', 'will', 'would', 'was', 'now', 'do', 'should', 'can', 'could', 'may', 'might', 'must', 'should', 'would', 'no', 'but', 'for', 'not', 'on', 'if', 'so', 'as', 'at', 'are', 'can', 'one', 'this', 'be', 'https', 'arcviewer'}
    filtered_words = {word: count for word, count in word_counts.items() 
                     if word not in stop_words and len(word) > 1}
    for word, count in Counter(filtered_words).most_common(20):
        print(f"'{word}': {count} times")
    print()
    
    print("Top 10 Most Used Emojis:")
    for emoji_code, count in emoji_counts.most_common(10):
        print(f"{emoji_code}: {count} times")

# Load and analyze the data
with open('C:\\Users\\vikto\\Desktop\\ForumExport_5.json', 'r', encoding='utf-8') as f:
    discord_data = json.load(f)
    
analyze_discord_data(discord_data)


=== Discord Thread Analysis ===

Total Messages: 14399

Top 10 Most Active Users by Message Count:
Loloppe: 2817 messages
GalaxyMaster: 965 messages
Poochy: 960 messages
Emy: 894 messages
edgii: 753 messages
FestiveApe: 558 messages
Blackjack: 466 messages
Light Ai: 431 messages
Cratornugget: 393 messages
BlAck_vOid-1001: 310 messages

Top 10 Most Active Users by Word Count:
Loloppe: 44639 words
GalaxyMaster: 10372 words
Poochy: 9586 words
LackWiz: 8380 words
Emy: 6964 words
Cratornugget: 6550 words
edgii: 6357 words
Light Ai: 5712 words
Blackjack: 4773 words
Undeceiver: 3716 words

Top 10 Most Active Users by Reactions Given:
Dereknalox123: 5 reactions
🔹 Darkrealm7 🔹: 4 reactions
BlAck_vOid-1001: 3 reactions
SL4SH: 2 reactions
Koreami: 2 reactions
LackWiz: 2 reactions
abbad: 1 reactions
FireGiraffe :3: 1 reactions
Lalahti: 1 reactions
ViSi: 1 reactions

Top 10 Biggest Threads:
Viyella's Melancholy - Serephor: 653 messages; reactions: 
  Serephor: 239 messages
  Poochy: 190 messages
  