In [17]:
import requests
import csv
import datetime
from tqdm import tqdm
import time

start_date = datetime.datetime(2022, 1, 30)
end_date = datetime.datetime(2022, 1, 31)

with open('reddit_bitcoin_posts.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['post_id', 'title', 'selftext', 'url', 'author', 'score', 'publish_date', 'num_of_comments',
                   'permalink', 'flair'])

    for date in tqdm(range((end_date - start_date).days + 1)):
        date_str = (start_date + datetime.timedelta(date)).strftime("%Y-%m-%d")
        timestamp = int((start_date + datetime.timedelta(date)).timestamp())

        url = f"https://api.pushshift.io/reddit/search/submission/?q=bitcoin&after={timestamp}&before={timestamp+86400}&size=100&subreddit=bitcoin"

        success = False

        while not success:
            response = requests.get(url)

            if response.status_code == 200:
                success = True
                data = response.json()['data']

                for post in data:
                  title = post['title']
                  url = post['url']
                  try:
                      # if flair is available then get it, else set 'NaN'
                      flair = post['link_flair_text']
                  except KeyError:
                      flair = 'NaN'
                  author = post['author']
                  sub_id = post['id']
                  score = post['score']
                  try:
                      # if selftext is available then get it, else set it empty
                      selftext = post['selftext']
                      list_of_empty_markers = ['[removed]', '[deleted]']
                      # many times selftext would be removed or deleted, if thats the case then set it empty
                      if selftext in list_of_empty_markers:
                          selftext = ''
                  except:
                      selftext = ''
                  created = datetime.datetime.fromtimestamp(post['created_utc'])  # 1520561700.0
                  numComms = post['num_comments']
                  permalink = post['permalink']
                  writer.writerow([sub_id, title, selftext, url, author,
                                  score, created, numComms, permalink, flair
                                  ])

                print(f"Fetched and wrote {len(data)} Reddit posts for {date_str} to CSV file.")

            else:
                print(f"Failed to fetch data from API for {date_str}. Status code: {response.status_code}. Retrying in 1 minute...")
                time.sleep(60)

print("Done!")


 50%|█████     | 1/2 [00:02<00:02,  2.76s/it]

Fetched and wrote 88 Reddit posts for 2022-01-30 to CSV file.


100%|██████████| 2/2 [00:06<00:00,  3.48s/it]

Fetched and wrote 95 Reddit posts for 2022-01-31 to CSV file.
Done!



