In [4]:
%pip install praw

Collecting praw
  Downloading praw-7.8.1-py3-none-any.whl.metadata (9.4 kB)
Collecting prawcore<3,>=2.4 (from praw)
  Using cached prawcore-2.4.0-py3-none-any.whl.metadata (5.0 kB)
Collecting update_checker>=0.18 (from praw)
  Using cached update_checker-0.18.0-py3-none-any.whl.metadata (2.3 kB)
Downloading praw-7.8.1-py3-none-any.whl (189 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.3/189.3 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0mMB/s[0m eta [36m0:00:01[0m
[?25hUsing cached prawcore-2.4.0-py3-none-any.whl (17 kB)
Using cached update_checker-0.18.0-py3-none-any.whl (7.0 kB)
Installing collected packages: update_checker, prawcore, praw
Successfully installed praw-7.8.1 prawcore-2.4.0 update_checker-0.18.0
Note: you may need to restart the kernel to use updated packages.


In [79]:
import praw
import pandas as pd
import time
from datetime import datetime
import networkx as nx

In [81]:
import yaml

In [143]:
def initialize_reddit():
    with open("../config/config.yaml", "r") as file:
        config = yaml.safe_load(file)

    # Initialize Reddit client
    return praw.Reddit(
        client_id=config["api_credentials"]["reddit"]["client_id"],
        client_secret=config["api_credentials"]["reddit"]["client_secret"],
        user_agent=config["api_credentials"]["reddit"]["user_agent"],
    )

In [145]:
# Read venue data
bars_df = pd.read_csv('../data/raw/Chicago_Bars.csv')
venues_df = pd.read_csv('../data/raw/PPA_and_Music_and_Dance_Venues.csv')

In [147]:
# Initialize Reddit API
reddit = initialize_reddit()

In [149]:
reddit

<praw.reddit.Reddit at 0x12d24a0c0>

In [151]:
def get_venue_names(bars_df, venues_df):
    bar_names = bars_df['DOING BUSINESS AS NAME'].dropna().unique().tolist()
    venue_names = venues_df['DOING BUSINESS AS NAME'].dropna().unique().tolist()
    return list(set(bar_names + venue_names))

In [153]:
# Get venue names
venues = get_venue_names(bars_df, venues_df)

In [155]:
venues[:10]

["THE NEW PAM'S LOUNGE",
 'CLUB ABERDEEN INC',
 'THE ARROGANT FROG BAR',
 'SUMMER HOUSE SANTA MONICA',
 'TOLEDO RESTAURANT',
 'RIVER SHANNON',
 'RED ROOSTER TAVERN',
 "TRIER LAD'S, INC.",
 "ETHYL'S PARTY",
 'IFLY']

In [157]:
len(venues)

2887

In [159]:
def fetch_reddit_data(reddit, venues, subreddits=['chicago', 'chicagofood', 'chicagonightlife']):
    posts_data = []
    comments_data = []
    user_data = []
    
    for subreddit_name in subreddits:
        subreddit = reddit.subreddit(subreddit_name)
        
        # Search for each venue
        for venue in venues[:200]:
            print(venue)
            try:
                # Search for posts
                for submission in subreddit.search(venue, limit=20):
                    posts_data.append({
                        'post_id': submission.id,
                        'title': submission.title,
                        'author': str(submission.author),
                        'score': submission.score,
                        'created_utc': datetime.fromtimestamp(submission.created_utc),
                        'venue': venue,
                        'subreddit': subreddit_name
                    })
                    
                    # Collect user data
                    if submission.author:
                        user_data.append({
                            'username': str(submission.author),
                            'created_utc': datetime.fromtimestamp(submission.author.created_utc),
                            'comment_karma': submission.author.comment_karma,
                            'link_karma': submission.author.link_karma
                        })
                    
                    # Fetch comments
                    submission.comments.replace_more(limit=0)
                    for comment in submission.comments.list():
                        comments_data.append({
                            'comment_id': comment.id,
                            'post_id': submission.id,
                            'author': str(comment.author),
                            'body': comment.body,
                            'score': comment.score,
                            'created_utc': datetime.fromtimestamp(comment.created_utc)
                        })
                        
                        # Collect commenter data
                        if comment.author:
                            user_data.append({
                                'username': str(comment.author),
                                'created_utc': datetime.fromtimestamp(comment.author.created_utc),
                                'comment_karma': comment.author.comment_karma,
                                'link_karma': comment.author.link_karma,
                                'comment_id': comment.id
                            })
                            
                time.sleep(2)  # Respect Reddit's rate limits
                
            except Exception as e:
                print(f"Error processing venue {venue}: {str(e)}")
                continue
    
    return pd.DataFrame(posts_data), pd.DataFrame(comments_data), pd.DataFrame(user_data).drop_duplicates(subset='username')

In [161]:
def create_interaction_graph(posts_df, comments_df):
    G = nx.Graph()
    
    # Add nodes for users
    all_users = set(posts_df['author'].unique()) | set(comments_df['author'].unique())
    G.add_nodes_from(all_users)
    
    # Add edges for interactions (comments)
    for _, comment in comments_df.iterrows():
        post_author = posts_df[posts_df['post_id'] == comment['post_id']]['author'].iloc[0]
        if post_author != comment['author']:
            G.add_edge(post_author, comment['author'])
    
    return G

In [163]:
posts_df, comments_df, users_df = fetch_reddit_data(reddit, venues)
G = create_interaction_graph(posts_df, comments_df)
    
# Save data
posts_df.to_csv('venue_posts.csv', index=False)
comments_df.to_csv('venue_comments.csv', index=False)
users_df.to_csv('venue_users.csv', index=False)
nx.write_gexf(G, 'venue_interactions.gexf')

THE NEW PAM'S LOUNGE
CLUB ABERDEEN INC
Error processing venue CLUB ABERDEEN INC: 'Redditor' object has no attribute 'created_utc'
THE ARROGANT FROG BAR
SUMMER HOUSE SANTA MONICA
Error processing venue SUMMER HOUSE SANTA MONICA: 'Redditor' object has no attribute 'created_utc'
TOLEDO RESTAURANT
RIVER SHANNON
RED ROOSTER TAVERN
Error processing venue RED ROOSTER TAVERN: 'Redditor' object has no attribute 'created_utc'
TRIER LAD'S, INC.
ETHYL'S PARTY
IFLY
Error processing venue IFLY: 'Redditor' object has no attribute 'created_utc'
SURFBAR
NEW WARSAW RESTAURANT
Error processing venue NEW WARSAW RESTAURANT: 'Redditor' object has no attribute 'created_utc'
BON V
TUMAN'S TAVERN
FENIX LOUNGE LTD
JP'S PUB
El Camino Club
Error processing venue El Camino Club: 'Redditor' object has no attribute 'created_utc'
CHEZ
Error processing venue CHEZ: 'Redditor' object has no attribute 'created_utc'
LLOYD'S LOUNGE, INC.
Error processing venue LLOYD'S LOUNGE, INC.: 'Redditor' object has no attribute 'creat