# Creazione del dataset con PRAW

## Collegamento al profilo

In [20]:
import praw
from dotenv import load_dotenv
load_dotenv()
import os
import pandas as pd
import csv
import igraph as ig
import sys
sys.path.append('..')

In [21]:
reddit = praw.Reddit(
    client_id=os.getenv('reddit_client_id'),
    client_secret=os.getenv('reddit_client_secret'),
    user_agent=os.getenv('reddit_user_agent'),
    password=os.getenv('reddit_password'),
    username=os.getenv('reddit_username'))

## reddit_comments

In [23]:
def extract_comments(comment,post_id):
    comment_id = comment.id
    comment_body = comment.body
    comment_score = comment.score
    comment_replies = len(comment.replies)
    created_utc = comment.created_utc
    comment_author_name = comment.author.name if comment.author else "deleted"
    comment_author_id = comment.author.id if comment.author else "deleted"
    parent_author = comment.parent()
    if parent_author.id == post_id:
        parent_author_name = "OP"
        parent_author_id = "0"
    else:
        parent_author_name = parent_author.author.name if parent_author and parent_author.author else "deleted"
        parent_author_id = parent_author.author.id if parent_author and parent_author.author else "deleted"
    return comment_id, comment_body, comment_score, comment_replies, created_utc, comment_author_id, comment_author_name, parent_author_id, parent_author_name

In [25]:
post_ids = ['17z2hci', '1bulhj9', '1b6cg4q', '17lese9', '1cwqkqe', '10v8sey', '197vo6o', '1d5h5h6']

fieldnames = [
    'comment_id', 
    'comment_author_id', 
    'comment_author_name', 
    'comment_parent_id', 
    'comment_parent_name', 
    'comment_score', 
    'comment_replies', 
    'comment_posted_time', 
    'comment_body',
    'post_id',
    'post_title',
    'post_url',
    'post_subreddit'
]

# Open the file in append mode
with open('../reddit_comments.csv', 'w+', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_MINIMAL)
    writer.writeheader()

    for post_id in post_ids:
        rows = []
        i = 1
        submission = reddit.submission(post_id)
        submission.comments.replace_more(limit=30)
        post_title = submission.title
        post_id = submission.id
        post_url = submission.url
        post_subreddit = submission.subreddit.name
        comments = submission.comments.list()
        length = len(comments)
        print(f"Processing post {post_title}\n", end='\r')
        for comment in comments:
            print(f"Processed {(i/length*100):.2f}% of comments", end='\r')
            try:
                comment_id, comment_body, comment_score, comment_replies, created_utc, comment_author_id, comment_author_name, parent_author_id, parent_author_name = extract_comments(comment,post_id)
                row = {
                    'comment_id' : comment_id, 
                    'comment_author_id' : comment_author_id,
                    'comment_author_name' : comment_author_name,
                    'comment_parent_id' : parent_author_id,
                    'comment_parent_name' : parent_author_name,
                    'comment_score' : comment_score,
                    'comment_replies' : comment_replies,
                    'comment_posted_time' : created_utc,
                    'comment_body'  : comment_body,
                    'post_id' : post_id,
                    'post_title' : post_title,
                    'post_url' : post_url,
                    'post_subreddit' : post_subreddit
                }
                rows.append(row)
            except Exception as e:
                pass
            i += 1
        writer.writerows(rows)

Processing post Ho visto troppi post sulla Giulia Cecchettin
Processing post Il problema del caldo anomalo(causato dal riscaldamento globale) che a nessuno sembra fregare qualcosa...
Processing post Possibile guerra NATO - RUSSIA. Se davvero dovessero succedere e vi chiamassero al fronte per combattere cosa fareste?
Processing post 4000 bambini morti in Palestina. Possibile che nessuno faccia niente a livello politico?
Processing post Chi voterete alle europee?
Processing post Natura sì: niente cibo a base di insetti/sintetici. Non rientrano nella nostra definizione di salute
Processing post Altre nubi sui Ferragnez settimana dopo settimana
Processing post Boom di giovani in fuga dall’Italia, sono oltre un milione
Processed 100.00% of comments

In [26]:
open_file = pd.read_csv('../reddit_comments.csv')

In [29]:
open_file

Unnamed: 0,comment_id,comment_author_id,comment_author_name,comment_parent_id,comment_parent_name,comment_score,comment_replies,comment_posted_time,comment_body,post_id,post_title,post_url,post_subreddit
0,k9yfohd,209w6fpg,moonwave91,0,OP,160,2,1.700439e+09,Il vero problema è l'immensa orda di giornalis...,17z2hci,Ho visto troppi post sulla Giulia Cecchettin,https://www.reddit.com/r/Italia/comments/17z2h...,t5_2rbm5
1,k9xjbzd,deleted,deleted,0,OP,310,3,1.700426e+09,"Non è un idiota l'ex ragazzo, è un assassino, ...",17z2hci,Ho visto troppi post sulla Giulia Cecchettin,https://www.reddit.com/r/Italia/comments/17z2h...,t5_2rbm5
2,k9zies2,e5pmjzs5c,MainDelay9804,0,OP,22,0,1.700459e+09,Sono sti ragazzetti sui social che ormai han p...,17z2hci,Ho visto troppi post sulla Giulia Cecchettin,https://www.reddit.com/r/Italia/comments/17z2h...,t5_2rbm5
3,k9xb0xr,utwr8mto,AlessandroIT,0,OP,83,1,1.700422e+09,"Non fatevi influenzare dai post, continuate ad...",17z2hci,Ho visto troppi post sulla Giulia Cecchettin,https://www.reddit.com/r/Italia/comments/17z2h...,t5_2rbm5
4,k9xmjhd,49phg2z0,CthulhuParty,0,OP,87,1,1.700427e+09,ti da fastidio perchè percepisci giustamente c...,17z2hci,Ho visto troppi post sulla Giulia Cecchettin,https://www.reddit.com/r/Italia/comments/17z2h...,t5_2rbm5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3758,l6oapir,vin55xk0,bister_is_here,fclpv,_samux_,1,0,1.717274e+09,Scusami non sto capendo.\n\n\nPremesso che ero...,1d5h5h6,"Boom di giovani in fuga dall’Italia, sono oltr...",https://quifinanza.it/economia/giovani-italian...,t5_2rbm5
3759,l6moubs,b1qcxw27,SimpatiaPazza,5sub6wtou,chris90italy,1,1,1.717252e+09,Ma cosa devo argomentare? Ti ho chiesto perché...,1d5h5h6,"Boom di giovani in fuga dall’Italia, sono oltr...",https://quifinanza.it/economia/giovani-italian...,t5_2rbm5
3760,l6moxgi,b1qcxw27,SimpatiaPazza,5sub6wtou,chris90italy,1,1,1.717252e+09,Italia paese di frustrati: ecco l'esempio vivente,1d5h5h6,"Boom di giovani in fuga dall’Italia, sono oltr...",https://quifinanza.it/economia/giovani-italian...,t5_2rbm5
3761,l6mq6kl,5sub6wtou,chris90italy,b1qcxw27,SimpatiaPazza,1,0,1.717253e+09,La cultura è l'esatto opposto di come ti compo...,1d5h5h6,"Boom di giovani in fuga dall’Italia, sono oltr...",https://quifinanza.it/economia/giovani-italian...,t5_2rbm5
