In [14]:
import requests
import json
from bs4 import BeautifulSoup
from datetime import datetime
from transformers import pipeline

In [15]:
def scrape_reddit_topics(search_query):
    # Use search endpoint with query
    url = f"https://www.reddit.com/search.json?q={search_query}&sort=relevance"
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.3'
    }
    try:
        # Get search results
        response = requests.get(url, headers=headers)
        data = response.json()
        
        posts_with_comments = []
        
        for post in data['data']['children']:
            post_data = post['data']
            
            # Get comments for this post
            comments_url = f"https://www.reddit.com{post_data['permalink']}.json"
            comments_response = requests.get(comments_url, headers=headers)
            comments_data = comments_response.json()
            
            # Extract comments
            comments = []
            if len(comments_data) > 1:  # Check if there are comments
                for comment in comments_data[1]['data']['children']:
                    if 'body' in comment['data']:
                        comments.append({
                            'author': comment['data'].get('author', '[deleted]'),
                            'body': comment['data']['body'],
                            'score': comment['data'].get('score', 0),
                            'created_utc': datetime.fromtimestamp(comment['data']['created_utc']).isoformat()
                        })
            
            # Combine post and comments data
            posts_with_comments.append({
                'title': post_data['title'],
                'author': post_data['author'],
                'score': post_data['score'],
                'url': post_data['url'],
                'created_utc': datetime.fromtimestamp(post_data['created_utc']).isoformat(),
                'num_comments': post_data['num_comments'],
                'selftext': post_data.get('selftext', ''),
                'comments': comments
            })
        
        # Save to JSON file
        with open(f'{search_query.replace(" ", "_")}_results.json', 'w', encoding='utf-8') as f:
            json.dump(posts_with_comments, f, indent=2, ensure_ascii=False)
            
        return posts_with_comments
        
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return None

In [16]:
#webscraping particular subreddits
print("Hi! Welcome to the Reddit Web Scraper!")
subreddit = input("Please enter the subreddit you would like to scrape: ")
posts = scrape_reddit_topics(subreddit)

Hi! Welcome to the Reddit Web Scraper!


In [17]:
#Scraped data is in JSON format
file=open(f'{subreddit.replace(" ", "_")}_results.json')
data = json.load(file)

In [18]:
def extract_posts_and_comments(json_obj):
    texts = []
    for entry in json_obj:
        # Extract post content (selftext)
        if 'selftext' in entry and entry['selftext']:
            texts.append(entry['selftext'])
        
        # Extract comments
        if 'comments' in entry:
            for comment in entry['comments']:
                if 'body' in comment:
                    texts.append(comment['body'])
    return texts

posts_and_comments = extract_posts_and_comments(data)



  ve been a big fan of John Mayer for a long time and I've learnt most of his stuff. This includes Neon which took me a few weeks of dedicated practice and made me appreciate how he plays it AND sings at the same time. I couldn't hope to sing that song while playing. I was talking with some lads in this music club I'm involved in and they all seemed to hate John Mayer for his apparent arrogance and 'generic' musical ability.
  
I don't understand this hate for John. Technique-wise, he's up there with Hendrix and SRV. He's one of the few people who I believe could play anything on the guitar, acoustic or electric. He's so technically advanced but yet, puts out a piece like slow dancing in a burning room which is so melodical and deliberate. I honestly think he's one of the most brilliant guitarists in history and might even go down as the GOAT. He also seems very down to earth in the interviews that I've seen.
John Mayer is a great guitarist who makes music I don’t like.
Personally I lo

In [None]:
#NLP stuff
