In [None]:
import requests
import csv
import time
from datetime import datetime, timezone

url = 'https://api.pushshift.io/reddit/search/submission/'

base_params = {
    'q': 'AI AND ESG', 
    'subreddit': 'EsgInvesting',  # 指定 r/EsgInvesting
    'size': 10, 
    'fields': 'title,selftext,author,subreddit,created_utc,score',  
    'sort': 'desc', 
    'after': '30d'  
}

output_csv = 'AI_ESG.csv'


with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(['Title', 'Selftext', 'Author', 'Subreddit', 'Score', 'Created_UTC']) # header


In [None]:
for batch in range(10):  
    try:
        response = requests.get(url, params=base_params)
        if response.status_code == 200:
            data = response.json().get('data', [])  
            if not data: 
                print("No more data found.")
                break


            filtered_records = []
            for item in data:
                title = item.get('title', 'N/A')
                selftext = item.get('selftext', 'N/A')
                author = item.get('author', 'N/A')
                subreddit = item.get('subreddit', 'N/A')
                score = item.get('score', 0)
                created_utc = item.get('created_utc', 0)

                # 僅保留好數據
                if score > 50:
                
                    # 將時間戳轉換為 UTC 時區的 datetime 物件
                    created_time = datetime.fromtimestamp(created_utc, tz=timezone.utc)
                    # 提取年份和月份，並格式化為 'YYYY-MM'
                    year_month = created_time.strftime('%Y-%m')

                    filtered_records.append([title, selftext, author, subreddit, score, created_time])


            with open(output_csv, 'a', newline='', encoding='utf-8') as csvfile:
                csv_writer = csv.writer(csvfile)
                csv_writer.writerows(filtered_records)


            print(f"Batch {batch + 1}: Retrieved {len(data)} records. Filtered: {len(filtered_records)} relevant records.")


            base_params['after'] = data[-1]['created_utc']
            

            del data
            time.sleep(1) 
        else:
            print(f"Error: HTTP {response.status_code}")
            break
    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
        break

print(f"Done.")
