In [2]:
import requests
from bs4 import BeautifulSoup
import json

# Step 1: Define the URL for the subreddit page
url = "https://www.reddit.com/r/politics/"

# Step 2: Fetch the page HTML
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"}
response = requests.get(url, headers=headers)

# Check if the request was successful
if response.status_code != 200:
    print(f"Failed to fetch the page: {response.status_code}")
    exit()

# Step 3: Parse the HTML with BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")

# Step 4: Scrape the posts
posts = []
post_elements = soup.find_all("div", class_="Post")  # Reddit posts are in divs with class "Post"

for post in post_elements:
    # Extract post title
    title_element = post.find("h3")  # Titles are usually in <h3> tags
    title = title_element.text if title_element else "N/A"

    # Extract post link
    link_element = post.find("a", href=True)  # Links are in <a> tags
    link = f"https://www.reddit.com{link_element['href']}" if link_element else "N/A"

    # Extract post score
    score_element = post.find("div", class_="score")  # Scores might have specific class
    score = score_element.text if score_element else "N/A"

    # Append post data
    posts.append({
        "title": title,
        "link": link,
        "score": score
    })

# Step 5: Save data to a JSON file
output_file = "politics_posts.json"
with open(output_file, "w") as json_file:
    json.dump(posts, json_file, indent=4)

print(f"Scraped {len(posts)} posts. Data saved to '{output_file}'.")


Scraped 0 posts. Data saved to 'politics_posts.json'.
