In [1]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))


True
NVIDIA GeForce RTX 3060 Ti


In [None]:
import praw
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
from dotenv import load_dotenv
import os

load_dotenv()

# --- Check for GPU ---
device = 0 if torch.cuda.is_available() else -1

# --- Sentiment Analysis Setup ---
sentiment_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_name)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name)
sentiment_pipeline = pipeline("sentiment-analysis", model=sentiment_model, tokenizer=sentiment_tokenizer, device=device)

# Sentiment label mapping
sentiment_label_map = {
    "LABEL_0": "Negative",
    "LABEL_1": "Neutral",
    "LABEL_2": "Positive"
}

# --- Emotion Detection Setup ---
emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
emotion_pipeline = pipeline("text-classification", model=emotion_model_name, tokenizer=emotion_model_name, top_k=1, device=device)

# --- Reddit Authentication ---
reddit = praw.Reddit(
    client_id=(os.environ['client_id']),
    client_secret=(os.environ['client_secret']),
    user_agent='script:gaming_trend (by /u/HiGhastlyy)',
    username=(os.environ['username']),
    password=(os.environ['password'])
)

# --- Subreddits and Data Collection ---
subreddits = ['gaming', 'pcgaming', 'leagueoflegends', 'cs2', 'dota2', 'overwatch', 'fortnite', 'apexlegends', 'valorant', 'minecraft']
posts = []

for sub in subreddits:
    for post in reddit.subreddit(sub).top(time_filter='year', limit=500):
        full_text = f"{post.title} {post.selftext}".strip()

        if full_text:
            # Sentiment
            sentiment_result = sentiment_pipeline(full_text[:512])[0]
            sentiment = sentiment_label_map[sentiment_result['label']]
            sentiment_score = sentiment_result['score']

            # Emotion
            emotion_result = emotion_pipeline(full_text[:512])[0][0]
            emotion = emotion_result['label']
            emotion_score = emotion_result['score']
        else:
            sentiment = "N/A"
            sentiment_score = 0.0
            emotion = "N/A"
            emotion_score = 0.0

        posts.append({
            'id': post.id,
            'subreddit': sub,
            'title': post.title,
            'body': post.selftext,
            'score': post.score,
            'num_comments': post.num_comments,
            'created_utc': post.created_utc,
            'sentiment': sentiment,
            'sentiment_confidence': round(sentiment_score, 3),
            'emotion': emotion,
            'emotion_confidence': round(emotion_score, 3)
        })

# --- Save to CSV ---
df = pd.DataFrame(posts)
df.to_csv("reddit_gaming_trends.csv", index=False)

print("✅ Done! Data saved to reddit_gaming_trends.csv")

  from .autonotebook import tqdm as notebook_tqdm
Device set to use cuda:0
Device set to use cuda:0
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


✅ Done! Data saved to reddit_gaming_trends.csv
