In [23]:
import os
import time
import pandas as pd
from dotenv import load_dotenv, find_dotenv

import praw
import prawcore
from praw.models import MoreComments

In [24]:
load_dotenv(find_dotenv())

CLIENT_ID=os.getenv("CLIENT_ID")
SECRET_TOKEN=os.getenv("SECRET_TOKEN")
USERNAME=os.getenv("USERNAME")
PASSWORD=os.getenv("PASSWORD")

reddit = praw.Reddit(client_id=CLIENT_ID,
                     client_secret=SECRET_TOKEN,
                     password=PASSWORD,
                     username=USERNAME,
                     user_agent='Tutorial')

print(reddit.user.me())

andreaLolli


In [25]:
df = pd.read_csv("data/rap_beef.csv")

In [26]:
authors_rel = []

for index, row in df.iterrows():
    # Find author and comments
    sub_auth = row['author']
    sub = reddit.submission(id=row['id'])
    
    # Load more comments by replacing MoreComments objects
    while True:
        try:
            sub.comments.replace_more(limit=20)
            break
        except praw.exceptions.APIException as e:
            if e.error_type == 'RATELIMIT':
                print(f"Rate limit exceeded. Sleeping for {e.sleep_time} seconds.")
                time.sleep(e.sleep_time)
            else:
                raise
        except prawcore.exceptions.TooManyRequests:
            print("Too many requests. Sleeping for 60 seconds.")
            time.sleep(60)
    
    # Extract author for each top-level comment
    for top_level_comment in sub.comments:
        if isinstance(top_level_comment, MoreComments):
            continue
        top_l_auth = top_level_comment.author
        if top_l_auth:
            authors_rel_element = [str(sub_auth), str(top_l_auth)]
            authors_rel.append(authors_rel_element)
        
            # Extract authors for each second-level comment
            for second_level_comment in top_level_comment.replies:
                if isinstance(second_level_comment, MoreComments):
                    continue
                sec_l_auth = second_level_comment.author
                if sec_l_auth:
                    elem = [str(top_l_auth), str(sec_l_auth)]
                    authors_rel.append(elem)
                
                    # Extract authors for each third-level comment
                    for third_level_comment in second_level_comment.replies:
                        if isinstance(third_level_comment, MoreComments):
                            continue
                        third_l_auth = third_level_comment.author
                        if third_l_auth:
                            elem = [str(sec_l_auth), str(third_l_auth)]
                            authors_rel.append(elem)

Too many requests. Sleeping for 60 seconds.
Too many requests. Sleeping for 60 seconds.
Too many requests. Sleeping for 60 seconds.


In [27]:
# Convert the authors_rel list to a DataFrame
authors_rel_df = pd.DataFrame(authors_rel, columns=['Parent_Author', 'Reply_Author'])
authors_rel_df

Unnamed: 0,Parent_Author,Reply_Author
0,Me_Zebra,gloomygl
1,gloomygl,Me_Zebra
2,Me_Zebra,yao_ming07
3,Me_Zebra,gloomygl
4,Me_Zebra,Waxfuu323
...,...,...
138052,whoistourlife,EpikPhones
138053,whoistourlife,Pb_Fardi
138054,whoistourlife,AlphaLax85
138055,whoistourlife,dylaptop


In [28]:
authors_rel_df= authors_rel_df.drop_duplicates()
authors_rel_df = authors_rel_df.mask(authors_rel_df.eq('None')).dropna()
authors_rel_df.to_csv('data/authors_relations.csv')