In [1]:
import praw
import pandas as pd
import re
import emoji
from datetime import datetime
from collections import defaultdict
from tqdm import tqdm
import matplotlib.pyplot as plt

In [2]:
# === Reddit API Setup ===
reddit = praw.Reddit(
    client_id='q_dYyqYYdNNInGsM-lC9Xg',
    client_secret='pLigWA6vX6llH7NjWBhVWmg-gJjKvg',
    user_agent='script:gaming_trend (by /u/HiGhastlyy)'
)

In [3]:
# === Base game to subreddit variants mapping ===
GAME_SUBREDDITS = {
    "Genshin Impact": ["genshin_impact", "GenshinImpact"],
    "Valorant": ["valorant", "ValorantCompetitive"],
    "GTA": ["GTA", "GTAOnline", "GTA6", "GrandTheftAutoV", "GTAV", "GTAV_Mods"],
    "League of Legends": ["leagueoflegends", "summonerschool", "LeagueConnect", "LeagueOfLegendsSFW"],
    "Gaming": ["gaming", "pcgaming", "Games", "Gamers","videogames"],
    "Call Of Duty": ["CallOfDuty", "CODWarzone","CodZombies", "CallOfDutyMobile","CODMobile","modernwarfare"],
    "Minecraft":  ['Minecraft','minecraftsuggestions','MinecraftDungeons','MinecraftServer'],
    "The Sims" : ['thesims','Sims4','TheSimsBuilding','Sims3', 'thesims4'],
    "Overwatch" : ['Overwatch', 'OverwatchLeague', 'Competitiveoverwatch', 'Competitiveoverwatch'],
    "Skyrim": ['skyrim', 'skyrimmods', 'SkyrimModsXbox', 'SkyrimTogether'],
    "Roblox": ['roblox', 'RobloxGame', 'GoCommitDie', 'RobloxDevelopers'],
}

In [4]:
# === Text cleaning ===
def clean_text(text):
    if not text:
        return ""
    text = text.encode("ascii", errors="ignore").decode()
    text = emoji.replace_emoji(text, replace='')
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# === Bot Detection ===
def is_suspected_bot(posts, threshold=5):
    duplicates = defaultdict(int)
    for p in posts:
        key = (p['author'], p['cleaned'])
        duplicates[key] += 1
    return {author for (author, _), count in duplicates.items() if count >= threshold}


In [5]:
def scrape_game_posts(game_name, subreddit_list, start_year=2020, end_year=2024, year_cap=200):
    print(f"\nScraping for '{game_name}'...")
    year_counts = defaultdict(int)
    collected = []

    def try_add(post, sub):
        if post.stickied or not post.title:
            return False

        created = datetime.utcfromtimestamp(post.created_utc)
        year = created.year
        if year < start_year or year > end_year:
            return False

        if year_counts[year] >= year_cap:
            return False

        cleaned_title = clean_text(post.title)
        if len(cleaned_title) < 10:
            return False

        post_data = {
            'game': game_name,
            'subreddit': sub,
            'author': str(post.author),
            'created_utc': created.isoformat(),
            'year': year,
            'score': post.score,
            'original_title': post.title,
            'cleaned': cleaned_title
        }

        key = (post_data['author'], post_data['original_title'], year)
        if key in seen:
            return False

        collected.append(post_data)
        seen.add(key)
        year_counts[year] += 1
        return True

    seen = set()

    for sub in subreddit_list:
        subreddit = reddit.subreddit(sub)
        print(f"  ➤ Scanning r/{sub}")

        MODES = [
            ("top", subreddit.top(limit=1000, time_filter="all")),
            ("new", subreddit.new(limit=1000)),
            ("hot", subreddit.hot(limit=1000)),
            ("rising", subreddit.rising(limit=250)),
        ]

        for mode_name, post_iter in MODES:
            if all(year_counts[y] >= year_cap for y in range(start_year, end_year + 1)):
                break  # Exit if all years are filled

            print(f"     → Trying .{mode_name}()")
            for post in tqdm(post_iter, desc=f"{sub} [{mode_name}]"):
                if all(year_counts[y] >= year_cap for y in range(start_year, end_year + 1)):
                    break
                try_add(post, sub)

    # Log any underfilled years
    for y in range(start_year, end_year + 1):
        if year_counts[y] < year_cap:
            print(f"⚠️  [{game_name}] Year {y} only has {year_counts[y]} posts")

    return collected


In [6]:
# === Run scraper across all games ===
all_posts = []

for game, subs in GAME_SUBREDDITS.items():
    all_posts.extend(scrape_game_posts(game, subs, year_cap=1000))

# === Filter out bot-like users ===
bot_users = is_suspected_bot(all_posts)
filtered_posts = [p for p in all_posts if p['author'] not in bot_users]


Scraping for 'Genshin Impact'...
  ➤ Scanning r/genshin_impact
     → Trying .top()


  created = datetime.utcfromtimestamp(post.created_utc)
genshin_impact [top]: 999it [00:22, 45.34it/s]


     → Trying .new()


genshin_impact [new]: 991it [00:23, 41.85it/s]


     → Trying .hot()


genshin_impact [hot]: 710it [00:16, 44.21it/s]


     → Trying .rising()


genshin_impact [rising]: 25it [00:00, 31.18it/s]


  ➤ Scanning r/GenshinImpact
     → Trying .top()


GenshinImpact [top]: 987it [00:16, 58.28it/s]


     → Trying .new()


GenshinImpact [new]: 990it [00:17, 58.06it/s]


     → Trying .hot()


GenshinImpact [hot]: 976it [00:16, 60.81it/s]


     → Trying .rising()


GenshinImpact [rising]: 25it [00:01, 20.78it/s]


⚠️  [Genshin Impact] Year 2020 only has 147 posts
⚠️  [Genshin Impact] Year 2021 only has 652 posts
⚠️  [Genshin Impact] Year 2022 only has 116 posts
⚠️  [Genshin Impact] Year 2023 only has 83 posts
⚠️  [Genshin Impact] Year 2024 only has 391 posts

Scraping for 'Valorant'...
  ➤ Scanning r/valorant
     → Trying .top()


valorant [top]: 984it [00:21, 44.82it/s]


     → Trying .new()


valorant [new]: 964it [00:16, 58.84it/s]


     → Trying .hot()


valorant [hot]: 662it [00:11, 57.38it/s]


     → Trying .rising()


valorant [rising]: 25it [00:01, 23.75it/s]


  ➤ Scanning r/ValorantCompetitive
     → Trying .top()


ValorantCompetitive [top]: 900it [00:25, 35.33it/s]


     → Trying .new()


ValorantCompetitive [new]: 975it [00:19, 49.77it/s]


     → Trying .hot()


ValorantCompetitive [hot]: 956it [00:21, 43.72it/s]


     → Trying .rising()


ValorantCompetitive [rising]: 25it [00:00, 48.03it/s]


⚠️  [Valorant] Year 2020 only has 389 posts
⚠️  [Valorant] Year 2021 only has 571 posts
⚠️  [Valorant] Year 2022 only has 397 posts
⚠️  [Valorant] Year 2023 only has 184 posts
⚠️  [Valorant] Year 2024 only has 163 posts

Scraping for 'GTA'...
  ➤ Scanning r/GTA
     → Trying .top()


GTA [top]: 981it [00:17, 55.98it/s]


     → Trying .new()


GTA [new]: 985it [00:18, 54.66it/s]


     → Trying .hot()


GTA [hot]: 910it [00:19, 46.32it/s]


     → Trying .rising()


GTA [rising]: 25it [00:00, 47.99it/s]


  ➤ Scanning r/GTAOnline
     → Trying .top()


GTAOnline [top]: 998it [00:20, 49.19it/s]


     → Trying .new()


GTAOnline [new]: 979it [00:21, 46.36it/s]


     → Trying .hot()


GTAOnline [hot]: 655it [00:14, 46.52it/s]


     → Trying .rising()


GTAOnline [rising]: 25it [00:00, 30.53it/s]


  ➤ Scanning r/GTA6
     → Trying .top()


GTA6 [top]: 984it [00:21, 46.08it/s]


     → Trying .new()


GTA6 [new]: 945it [00:24, 38.42it/s]


     → Trying .hot()


GTA6 [hot]: 500it [00:12, 38.58it/s]


     → Trying .rising()


GTA6 [rising]: 25it [00:00, 32.07it/s]


  ➤ Scanning r/GrandTheftAutoV
     → Trying .top()


GrandTheftAutoV [top]: 997it [00:14, 69.26it/s]


     → Trying .new()


GrandTheftAutoV [new]: 970it [00:18, 51.54it/s]


     → Trying .hot()


GrandTheftAutoV [hot]: 956it [00:19, 49.40it/s]


     → Trying .rising()


GrandTheftAutoV [rising]: 25it [00:00, 42.12it/s]


  ➤ Scanning r/GTAV
     → Trying .top()


GTAV [top]: 995it [00:19, 51.80it/s]


     → Trying .new()


GTAV [new]: 979it [00:20, 47.09it/s]


     → Trying .hot()


GTAV [hot]: 977it [00:22, 42.50it/s]


     → Trying .rising()


GTAV [rising]: 25it [00:00, 33.13it/s]


  ➤ Scanning r/GTAV_Mods
     → Trying .top()


GTAV_Mods [top]: 998it [00:20, 48.96it/s]


     → Trying .new()


GTAV_Mods [new]: 802it [00:13, 58.64it/s]


     → Trying .hot()


GTAV_Mods [hot]: 569it [00:11, 51.00it/s]


     → Trying .rising()


GTAV_Mods [rising]: 24it [00:00, 46.40it/s]


⚠️  [GTA] Year 2021 only has 962 posts
⚠️  [GTA] Year 2022 only has 272 posts
⚠️  [GTA] Year 2023 only has 333 posts
⚠️  [GTA] Year 2024 only has 979 posts

Scraping for 'League of Legends'...
  ➤ Scanning r/leagueoflegends
     → Trying .top()


leagueoflegends [top]: 986it [00:21, 46.94it/s]


     → Trying .new()


leagueoflegends [new]: 978it [00:16, 59.34it/s]


     → Trying .hot()


leagueoflegends [hot]: 666it [00:13, 50.19it/s]


     → Trying .rising()


leagueoflegends [rising]: 25it [00:00, 33.03it/s]


  ➤ Scanning r/summonerschool
     → Trying .top()


summonerschool [top]: 990it [00:19, 51.24it/s]


     → Trying .new()


summonerschool [new]: 972it [00:15, 63.37it/s]


     → Trying .hot()


summonerschool [hot]: 834it [00:16, 51.08it/s]


     → Trying .rising()


summonerschool [rising]: 25it [00:00, 50.94it/s]


  ➤ Scanning r/LeagueConnect
     → Trying .top()


LeagueConnect [top]: 997it [00:17, 56.85it/s]


     → Trying .new()


LeagueConnect [new]: 987it [00:12, 79.19it/s]


     → Trying .hot()


LeagueConnect [hot]: 969it [00:13, 73.00it/s]


     → Trying .rising()


LeagueConnect [rising]: 25it [00:00, 58.15it/s]


  ➤ Scanning r/LeagueOfLegendsSFW
     → Trying .top()


LeagueOfLegendsSFW [top]: 1000it [00:27, 36.87it/s]


     → Trying .new()


LeagueOfLegendsSFW [new]: 984it [00:23, 42.24it/s]


     → Trying .hot()


LeagueOfLegendsSFW [hot]: 999it [00:23, 43.30it/s]


     → Trying .rising()


LeagueOfLegendsSFW [rising]: 25it [00:00, 38.44it/s]


⚠️  [League of Legends] Year 2021 only has 720 posts
⚠️  [League of Legends] Year 2022 only has 209 posts
⚠️  [League of Legends] Year 2023 only has 953 posts
⚠️  [League of Legends] Year 2024 only has 323 posts

Scraping for 'Gaming'...
  ➤ Scanning r/gaming
     → Trying .top()


gaming [top]: 996it [00:18, 53.21it/s]


     → Trying .new()


gaming [new]: 834it [00:14, 56.74it/s]


     → Trying .hot()


gaming [hot]: 463it [00:10, 44.78it/s]


     → Trying .rising()


gaming [rising]: 26it [00:00, 36.98it/s]


  ➤ Scanning r/pcgaming
     → Trying .top()


pcgaming [top]: 996it [00:21, 46.38it/s]


     → Trying .new()


pcgaming [new]: 898it [00:18, 48.83it/s]


     → Trying .hot()


pcgaming [hot]: 242it [00:06, 34.59it/s]


     → Trying .rising()


pcgaming [rising]: 25it [00:00, 30.03it/s]


  ➤ Scanning r/Games
     → Trying .top()


Games [top]: 1000it [00:16, 59.55it/s]


     → Trying .new()


Games [new]: 789it [00:12, 62.44it/s]


     → Trying .hot()


Games [hot]: 435it [00:08, 52.13it/s]


     → Trying .rising()


Games [rising]: 25it [00:00, 56.11it/s]


  ➤ Scanning r/Gamers
     → Trying .top()


Gamers [top]: 996it [00:23, 42.90it/s]


     → Trying .new()


Gamers [new]: 936it [00:18, 50.98it/s]


     → Trying .hot()


Gamers [hot]: 840it [00:19, 43.10it/s]


     → Trying .rising()


Gamers [rising]: 25it [00:00, 31.56it/s]


  ➤ Scanning r/videogames
     → Trying .top()


videogames [top]: 910it [00:17, 52.64it/s]


     → Trying .new()


videogames [new]: 968it [00:17, 56.18it/s]


     → Trying .hot()


videogames [hot]: 912it [00:15, 57.75it/s]


     → Trying .rising()


videogames [rising]: 25it [00:00, 46.85it/s]


⚠️  [Gaming] Year 2020 only has 874 posts
⚠️  [Gaming] Year 2021 only has 706 posts
⚠️  [Gaming] Year 2022 only has 302 posts
⚠️  [Gaming] Year 2023 only has 213 posts
⚠️  [Gaming] Year 2024 only has 488 posts

Scraping for 'Call Of Duty'...
  ➤ Scanning r/CallOfDuty
     → Trying .top()


CallOfDuty [top]: 987it [00:19, 50.61it/s]


     → Trying .new()


CallOfDuty [new]: 972it [00:19, 50.50it/s]


     → Trying .hot()


CallOfDuty [hot]: 252it [00:09, 26.41it/s]


     → Trying .rising()


CallOfDuty [rising]: 25it [00:00, 43.61it/s]


  ➤ Scanning r/CODWarzone
     → Trying .top()


CODWarzone [top]: 998it [00:20, 49.21it/s]


     → Trying .new()


CODWarzone [new]: 982it [00:19, 51.30it/s]


     → Trying .hot()


CODWarzone [hot]: 772it [00:14, 53.36it/s]


     → Trying .rising()


CODWarzone [rising]: 25it [00:00, 48.03it/s]


  ➤ Scanning r/CodZombies
     → Trying .top()


CodZombies [top]: 997it [00:19, 51.92it/s]


     → Trying .new()


CodZombies [new]: 984it [00:19, 50.51it/s]


     → Trying .hot()


CodZombies [hot]: 850it [00:16, 50.85it/s]


     → Trying .rising()


CodZombies [rising]: 25it [00:00, 33.33it/s]


  ➤ Scanning r/CallOfDutyMobile
     → Trying .top()


CallOfDutyMobile [top]: 996it [00:20, 48.66it/s]


     → Trying .new()


CallOfDutyMobile [new]: 929it [00:19, 46.92it/s]


     → Trying .hot()


CallOfDutyMobile [hot]: 830it [00:16, 50.95it/s]


     → Trying .rising()


CallOfDutyMobile [rising]: 25it [00:00, 47.94it/s]


  ➤ Scanning r/CODMobile
     → Trying .top()


CODMobile [top]: 995it [00:18, 53.52it/s]


     → Trying .new()


CODMobile [new]: 821it [00:15, 53.14it/s]


     → Trying .hot()


CODMobile [hot]: 801it [00:14, 56.30it/s]


     → Trying .rising()


CODMobile [rising]: 25it [00:00, 40.79it/s]


  ➤ Scanning r/modernwarfare
     → Trying .top()


modernwarfare [top]: 992it [00:20, 49.37it/s]


     → Trying .new()


modernwarfare [new]: 966it [00:19, 50.53it/s]


     → Trying .hot()


modernwarfare [hot]: 824it [00:17, 47.86it/s]


     → Trying .rising()


modernwarfare [rising]: 25it [00:00, 27.16it/s]


⚠️  [Call Of Duty] Year 2022 only has 184 posts
⚠️  [Call Of Duty] Year 2023 only has 97 posts
⚠️  [Call Of Duty] Year 2024 only has 210 posts

Scraping for 'Minecraft'...
  ➤ Scanning r/Minecraft
     → Trying .top()


Minecraft [top]: 987it [00:21, 46.89it/s]


     → Trying .new()


Minecraft [new]: 967it [00:24, 39.20it/s]


     → Trying .hot()


Minecraft [hot]: 925it [00:20, 45.08it/s]


     → Trying .rising()


Minecraft [rising]: 25it [00:00, 30.93it/s]


  ➤ Scanning r/minecraftsuggestions
     → Trying .top()


minecraftsuggestions [top]: 996it [00:23, 42.25it/s]


     → Trying .new()


minecraftsuggestions [new]: 955it [00:21, 43.80it/s]


     → Trying .hot()


minecraftsuggestions [hot]: 659it [00:15, 42.94it/s]


     → Trying .rising()


minecraftsuggestions [rising]: 25it [00:00, 38.54it/s]


  ➤ Scanning r/MinecraftDungeons
     → Trying .top()


MinecraftDungeons [top]: 998it [00:19, 51.69it/s]


     → Trying .new()


MinecraftDungeons [new]: 995it [00:18, 52.38it/s]


     → Trying .hot()


MinecraftDungeons [hot]: 991it [00:22, 43.22it/s]


     → Trying .rising()


MinecraftDungeons [rising]: 25it [00:00, 31.90it/s]


  ➤ Scanning r/MinecraftServer
     → Trying .top()


MinecraftServer [top]: 1000it [00:23, 42.74it/s]


     → Trying .new()


MinecraftServer [new]: 957it [00:17, 55.28it/s]


     → Trying .hot()


MinecraftServer [hot]: 948it [00:18, 52.49it/s]


     → Trying .rising()


MinecraftServer [rising]: 25it [00:00, 50.66it/s]


⚠️  [Minecraft] Year 2022 only has 194 posts
⚠️  [Minecraft] Year 2023 only has 62 posts
⚠️  [Minecraft] Year 2024 only has 94 posts

Scraping for 'The Sims'...
  ➤ Scanning r/thesims
     → Trying .top()


thesims [top]: 1000it [00:20, 47.74it/s]


     → Trying .new()


thesims [new]: 994it [00:23, 42.76it/s]


     → Trying .hot()


thesims [hot]: 960it [00:25, 37.34it/s]


     → Trying .rising()


thesims [rising]: 25it [00:00, 29.61it/s]


  ➤ Scanning r/Sims4
     → Trying .top()


Sims4 [top]: 1000it [00:24, 41.59it/s]


     → Trying .new()


Sims4 [new]: 991it [00:30, 32.29it/s]


     → Trying .hot()


Sims4 [hot]: 465it [00:15, 29.62it/s]


     → Trying .rising()


Sims4 [rising]: 25it [00:00, 30.79it/s]


  ➤ Scanning r/TheSimsBuilding
     → Trying .top()


TheSimsBuilding [top]: 1000it [00:48, 20.62it/s]


     → Trying .new()


TheSimsBuilding [new]: 995it [00:59, 16.76it/s]


     → Trying .hot()


TheSimsBuilding [hot]: 993it [00:55, 17.94it/s]


     → Trying .rising()


TheSimsBuilding [rising]: 25it [00:01, 19.06it/s]


  ➤ Scanning r/Sims3
     → Trying .top()


Sims3 [top]: 1000it [00:25, 39.55it/s]


     → Trying .new()


Sims3 [new]: 994it [00:24, 40.67it/s]


     → Trying .hot()


Sims3 [hot]: 985it [00:23, 41.51it/s]


     → Trying .rising()


Sims3 [rising]: 25it [00:01, 20.73it/s]


  ➤ Scanning r/thesims4
     → Trying .top()


thesims4 [top]: 999it [00:29, 33.71it/s]


     → Trying .new()


thesims4 [new]: 978it [00:33, 29.59it/s]


     → Trying .hot()


thesims4 [hot]: 239it [00:10, 23.54it/s]


     → Trying .rising()


thesims4 [rising]: 25it [00:01, 24.74it/s]


⚠️  [The Sims] Year 2020 only has 761 posts
⚠️  [The Sims] Year 2021 only has 692 posts
⚠️  [The Sims] Year 2022 only has 638 posts
⚠️  [The Sims] Year 2023 only has 638 posts

Scraping for 'Overwatch'...
  ➤ Scanning r/Overwatch
     → Trying .top()


Overwatch [top]: 995it [00:19, 52.13it/s]


     → Trying .new()


Overwatch [new]: 991it [00:18, 54.28it/s]


     → Trying .hot()


Overwatch [hot]: 838it [00:15, 53.40it/s]


     → Trying .rising()


Overwatch [rising]: 25it [00:00, 43.12it/s]


  ➤ Scanning r/OverwatchLeague
     → Trying .top()


OverwatchLeague [top]: 998it [00:19, 51.02it/s]


     → Trying .new()


OverwatchLeague [new]: 968it [00:20, 48.37it/s]


     → Trying .hot()


OverwatchLeague [hot]: 912it [00:18, 48.91it/s]


     → Trying .rising()


OverwatchLeague [rising]: 25it [00:00, 33.36it/s]


  ➤ Scanning r/Competitiveoverwatch
     → Trying .top()


Competitiveoverwatch [top]: 999it [00:20, 49.10it/s]


     → Trying .new()


Competitiveoverwatch [new]: 993it [00:18, 53.16it/s]


     → Trying .hot()


Competitiveoverwatch [hot]: 989it [00:18, 54.52it/s]


     → Trying .rising()


Competitiveoverwatch [rising]: 25it [00:00, 44.39it/s]


  ➤ Scanning r/Competitiveoverwatch
     → Trying .top()


Competitiveoverwatch [top]: 999it [00:19, 51.03it/s]


     → Trying .new()


Competitiveoverwatch [new]: 993it [00:19, 51.44it/s]


     → Trying .hot()


Competitiveoverwatch [hot]: 989it [00:19, 51.16it/s]


     → Trying .rising()


Competitiveoverwatch [rising]: 25it [00:00, 42.36it/s]


⚠️  [Overwatch] Year 2020 only has 821 posts
⚠️  [Overwatch] Year 2021 only has 393 posts
⚠️  [Overwatch] Year 2022 only has 229 posts
⚠️  [Overwatch] Year 2023 only has 692 posts
⚠️  [Overwatch] Year 2024 only has 87 posts

Scraping for 'Skyrim'...
  ➤ Scanning r/skyrim
     → Trying .top()


skyrim [top]: 989it [00:20, 47.84it/s]


     → Trying .new()


skyrim [new]: 994it [00:18, 54.24it/s]


     → Trying .hot()


skyrim [hot]: 964it [00:18, 52.09it/s]


     → Trying .rising()


skyrim [rising]: 25it [00:00, 45.77it/s]


  ➤ Scanning r/skyrimmods
     → Trying .top()


skyrimmods [top]: 996it [00:22, 44.05it/s]


     → Trying .new()


skyrimmods [new]: 985it [00:15, 64.86it/s]


     → Trying .hot()


skyrimmods [hot]: 997it [00:15, 65.89it/s]


     → Trying .rising()


skyrimmods [rising]: 25it [00:00, 50.46it/s]


  ➤ Scanning r/SkyrimModsXbox
     → Trying .top()


SkyrimModsXbox [top]: 1000it [00:50, 19.79it/s]


     → Trying .new()


SkyrimModsXbox [new]: 990it [00:19, 49.78it/s]


     → Trying .hot()


SkyrimModsXbox [hot]: 999it [00:19, 51.38it/s]


     → Trying .rising()


SkyrimModsXbox [rising]: 25it [00:00, 50.70it/s]


  ➤ Scanning r/SkyrimTogether
     → Trying .top()


SkyrimTogether [top]: 995it [00:18, 53.54it/s]


     → Trying .new()


SkyrimTogether [new]: 988it [00:17, 57.53it/s]


     → Trying .hot()


SkyrimTogether [hot]: 922it [00:16, 56.44it/s]


     → Trying .rising()


SkyrimTogether [rising]: 25it [00:00, 51.31it/s]


⚠️  [Skyrim] Year 2020 only has 702 posts
⚠️  [Skyrim] Year 2021 only has 744 posts
⚠️  [Skyrim] Year 2022 only has 651 posts
⚠️  [Skyrim] Year 2023 only has 732 posts
⚠️  [Skyrim] Year 2024 only has 611 posts

Scraping for 'Roblox'...
  ➤ Scanning r/roblox
     → Trying .top()


roblox [top]: 992it [00:21, 45.66it/s]


     → Trying .new()


roblox [new]: 930it [00:19, 48.18it/s]


     → Trying .hot()


roblox [hot]: 497it [00:13, 37.47it/s]


     → Trying .rising()


roblox [rising]: 25it [00:00, 43.68it/s]


  ➤ Scanning r/RobloxGame
     → Trying .top()


RobloxGame [top]: 8it [00:00, 21.19it/s]


     → Trying .new()


RobloxGame [new]: 8it [00:00, 20.53it/s]


     → Trying .hot()


RobloxGame [hot]: 8it [00:00, 21.21it/s]


     → Trying .rising()


RobloxGame [rising]: 8it [00:00, 21.24it/s]


  ➤ Scanning r/GoCommitDie
     → Trying .top()


GoCommitDie [top]: 997it [00:18, 54.48it/s]


     → Trying .new()


GoCommitDie [new]: 997it [00:19, 52.40it/s]


     → Trying .hot()


GoCommitDie [hot]: 948it [00:18, 50.29it/s]


     → Trying .rising()


GoCommitDie [rising]: 25it [00:00, 48.77it/s]


  ➤ Scanning r/RobloxDevelopers
     → Trying .top()


RobloxDevelopers [top]: 1000it [00:24, 41.29it/s]


     → Trying .new()


RobloxDevelopers [new]: 935it [00:19, 47.96it/s]


     → Trying .hot()


RobloxDevelopers [hot]: 800it [00:15, 51.56it/s]


     → Trying .rising()


RobloxDevelopers [rising]: 24it [00:00, 42.67it/s]

⚠️  [Roblox] Year 2020 only has 925 posts
⚠️  [Roblox] Year 2021 only has 602 posts
⚠️  [Roblox] Year 2022 only has 257 posts
⚠️  [Roblox] Year 2023 only has 146 posts
⚠️  [Roblox] Year 2024 only has 291 posts





In [7]:
# === Deduplicate and save ===
df = pd.DataFrame(filtered_posts)
df = df.drop_duplicates(subset=["game", "author", "original_title"])

In [8]:
# === Save to CSV ===
df.to_csv("reddit_2020_2025.csv", index=False)
print(f"\n✅ Saved {len(df)} posts across all games. Bots removed, titles cleaned, capped per year.")


✅ Saved 27762 posts across all games. Bots removed, titles cleaned, capped per year.


In [None]:
# Load the saved CSV
file_path = "reddit_2020_2025.csv"


df = pd.read_csv(file_path)

In [None]:
# Group by game and year
game_year_counts = df.groupby(["game", "year"]).size().unstack(fill_value=0)



In [None]:
# Plot
game_year_counts.T.plot(kind='bar', figsize=(12, 6), edgecolor='black')
plt.title("Post Count per Game per Year")
plt.xlabel("Year")
plt.ylabel("Number of Posts")
plt.legend(title="Game")
plt.xticks(rotation=0)
plt.grid(axis='y')
plt.tight_layout()
plt.show()