In [1]:
import praw
import pandas as pd
from textblob import TextBlob
import time


In [2]:
# Setting up PRAW with our Reddit app credentials
reddit = praw.Reddit(
    client_id=    'your_client_id',  # replace with your client id
    client_secret= 'your_client_secret',  # replace with your client secret
    user_agent='MoT-Group9',
)

# Enable rate limit handling
reddit.read_only = True

In [3]:
def get_sentiment(text):
    """Determine the sentiment of a text."""
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        return 'positive'
    elif analysis.sentiment.polarity < 0:
        return 'negative'
    else:
        return 'neutral'

def fetch_comments(submission):
    """Fetch comments for a Reddit submission."""
    submission.comments.replace_more(limit=None)
    comments = []
    for comment in submission.comments.list():
        if isinstance(comment, praw.models.Comment) and comment.body != '[deleted]':
            comments.append({
                'post_title': submission.title,
                'comment_body': comment.body,
            })
    return comments

def fetch_subreddit_data(subreddit_name, post_urls):
    """Fetch data for a specific subreddit based on provided post URLs."""
    all_comments = []
    for post_url in post_urls:
        submission = reddit.submission(url=post_url)
        all_comments.append({
            'subreddit': subreddit_name,
            'post_title': submission.title,
            'comment_body': submission.selftext,
        })
        all_comments.extend(fetch_comments(submission))
    return all_comments

In [4]:
# Example usage:
subreddit_names = ['singularity', 'Futurology', 'ArtificialInteligence','technology', 'AskReddit', 'privacy', 'RandomThoughts', 'AskEngineers','changemyview', 'Ecommerce', 'ChatGPT', 'YouShouldKnow',  'ArtistLounge', 'CasualConversation', 'Freelance', 'graphic_design', 'GenX', 'intj', 'ControlProblem','FinancialCareers','StableDiffusion', 'INTP'   ]
post_urls_for_each_subreddit = {
    'singularity': ['https://www.reddit.com/r/singularity/comments/132kgur/i_dont_fear_malicious_or_rogue_ai_i_fear_how_ai/'],
    'Futurology': ['https://www.reddit.com/r/Futurology/comments/9z9g0j/why_do_we_fear_artificial_intelligence_andor/', 'https://www.reddit.com/r/Futurology/comments/zo2ugk/ai_fearmongering_is_irrational_panic_and_its/', 'https://www.reddit.com/r/Futurology/comments/127lm4i/openai_ceo_its_not_funny_that_im_afraid_of_the_ai/', 'https://www.reddit.com/r/Futurology/comments/10ffivq/an_honest_admission_i_fear_the_upcoming/', 'https://www.reddit.com/r/Futurology/comments/133tcrs/is_doomsday_talk_about_ai_a_result_of/'],
    'ArtificialInteligence': ['https://www.reddit.com/r/ArtificialInteligence/comments/17ddumf/what_is_expected_to_gain_from_an_ai_safety_summit/', 'https://www.reddit.com/r/ArtificialInteligence/comments/13607gt/i_think_ai_should_be_removed_from_public_use/', 'https://www.reddit.com/r/ArtificialInteligence/comments/15u0b1p/ai_is_gonna_ruin_the_world/'], 
    'technology': ['https://www.reddit.com/r/technology/comments/15wnpav/police_in_england_installed_an_ai_camera_system/'], 
    'AskReddit': ['https://www.reddit.com/r/AskReddit/comments/135ixrr/how_scared_are_you_of_ai_replacing_your_career/','https://www.reddit.com/r/AskReddit/comments/11hqwfb/does_artificial_intelligence_scare_you_why/', 'https://www.reddit.com/r/AskReddit/comments/14ax16q/what_makes_ai_so_scary_to_people/'], 
    'privacy': ['https://www.reddit.com/r/privacy/comments/7itwrl/facial_recognition_for_public_surveillance_is/', 'https://www.reddit.com/r/privacy/comments/12b9di3/we_really_need_regulations_on_ai_and_privacy/'], 
    'RandomThoughts': ['https://www.reddit.com/r/RandomThoughts/comments/125y0vu/ai_is_going_to_ruin_so_many_peoples_lives_so/', 'https://www.reddit.com/r/RandomThoughts/comments/11r6dio/im_so_scared_of_ai/'], 
    'AskEngineers': ['https://www.reddit.com/r/AskEngineers/comments/13xgz8z/whats_with_the_ai_fear/'], 
    'changemyview': ['https://www.reddit.com/r/changemyview/comments/84exk2/cmv_the_ai_scare_is_rooted_in_sensational_fear/'],
    'Ecommerce': ['https://www.reddit.com/r/ecommerce/comments/13ojidl/be_honest_are_you_scared_of_ai_taking_over_your/'], 
    'ChatGPT': ['https://www.reddit.com/r/ChatGPT/comments/135z6jw/what_are_ai_developers_seeing_privately_that_they/'], 
    'YouShouldKnow': ['https://www.reddit.com/r/YouShouldKnow/comments/120m3f6/ysk_the_future_of_monitoring_how_large_language/'], 
    'ArtistLounge': ['https://www.reddit.com/r/ArtistLounge/comments/1701von/what_scares_me_the_most_about_ai_art_is_that_it/'], 
    'CasualConversation': ['https://www.reddit.com/r/CasualConversation/comments/121vk3k/my_parents_have_always_said_new_technology_scares/'], 
    'Freelance': ['https://www.reddit.com/r/freelance/comments/11wekfi/anxiety_around_ai/'], 
    'graphic_design': ['https://www.reddit.com/r/graphic_design/comments/wre0zb/am_i_the_only_one_scared_from_ai_replacing_his_job/'], 
    'GenX': ['https://www.reddit.com/r/GenX/comments/17fl2n1/anyone_else_low_key_terrified_of_ais_impact_on/'], 
    'intj': ['https://www.reddit.com/r/intj/comments/11mxwjy/is_the_future_of_ai_terrifying_to_anybody_else/'], 
    'ControlProblem': ['https://www.reddit.com/r/ControlProblem/comments/189vy8r/terrified_about_ai_and_agiasi/'], 
    'FinancialCareers':['https://www.reddit.com/r/FinancialCareers/comments/122c3qk/im_so_scared_of_ai/'], 
    'StableDiffusion': ['https://www.reddit.com/r/StableDiffusion/comments/16q0hun/seriously_whats_with_the_rampant_aiphobia_that/'], 
    'INTP': ['https://www.reddit.com/r/INTP/comments/12xpv9e/im_honestly_a_little_baffled_as_to_why_theres/']
}

github_linked_folder = '/work/GitHub_ML_Deepnote/Machine Learning/1. Extracted Reddit Data/'

In [7]:
# Initializing the main data structure
all_comments_data = []

for subreddit_name in subreddit_names:
    # Make a lightweight initial request to populate the rate limit attributes if they're None
    if not hasattr(reddit.auth, 'limits') or reddit.auth.limits is None:
        try:
            reddit.user.me()  # lightweight request to populate rate limit attributes
        except Exception as e:
            print(f"Initial request failed: {e}")

    remaining_requests = reddit.auth.limits.get('remaining', 100)
    reset_timestamp = reddit.auth.limits.get('reset_timestamp', time.time() + 60)  # default to 60 seconds later

    # If remaining requests are less than a buffer amount, wait until the rate limit resets
    if remaining_requests and remaining_requests < 10:  # Keeping a buffer of 10 requests
        sleep_time = max(0, reset_timestamp - time.time())
        print(f"Approaching rate limit, sleeping for {sleep_time} seconds.")
        time.sleep(sleep_time)

    # Fetch data for the current subreddit
    comments = fetch_subreddit_data(subreddit_name, post_urls_for_each_subreddit[subreddit_name])
    all_comments_data.extend(comments)

# Fetching and storing data
for subreddit_name in subreddit_names:
    comments = fetch_subreddit_data(subreddit_name, post_urls_for_each_subreddit[subreddit_name])
    all_comments_data.extend(comments)

# Convert all comments data to DataFrame
all_comments_df = pd.DataFrame(all_comments_data)

# Save all comments data
all_comments_df.to_csv('/work/GitHub_ML_Deepnote/Machine Learning/1. Extracted Reddit Data/0_all_comments_data.csv', index=False, encoding='utf-8')



ResponseException: received 401 HTTP response

In [None]:
# Creating summary DataFrame
summary_data = all_comments_df.groupby(['subreddit', 'post_title']).size().reset_index(name='total_comments')
summary_data['url'] = summary_data['subreddit'].map(post_urls_for_each_subreddit)
summary_data['post_id'] = summary_data.index.map(lambda x: f'{x+1:03d}')
summary_data['post_sentiment'] = summary_data['post_title'].apply(get_sentiment)
summary_data = summary_data[['subreddit', 'post_id', 'post_sentiment', 'post_title', 'total_comments', 'url']]

# Save the summary data
summary_data.to_csv('/work/GitHub_ML_Deepnote/Machine Learning/1. Extracted Reddit Data/0_subreddits_summary.csv', index=False)

# Display the summary data
print(summary_data)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=f64215d6-debc-46bd-b273-63565459a66d' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>