In [None]:
from pytrends.request import TrendReq
import pandas as pd
import time
import os

# ✅ Initialize Pytrends
pytrends = TrendReq(hl='en-US', tz=360, retries=3, backoff_factor=5, requests_args={'verify': False})

# ✅ Read keyword list from file
file_path = r"/Users/qibinhuang/Desktop/Desktop - Qibin的MacBook Air/teaching/cleaned_mental_health_keywords.txt"

with open(file_path, "r", encoding="utf-8") as file:
    keywords = [line.strip() for line in file if line.strip() and not line.startswith("#")]

# ✅ Set batch_size to avoid Google Trends API limitation (max 5 keywords per request)
batch_size = 3  # Lowered to 3 to reduce API errors
keyword_batches = [keywords[i:i + batch_size] for i in range(0, len(keywords), batch_size)]

# ✅ Initialize a DataFrame to store all trend data
df_all_trends = pd.DataFrame()

# ✅ Track failed keyword batches for later retry
failed_batches = []

# ✅ Loop through each keyword batch to retrieve trend data
for i, batch in enumerate(keyword_batches):
    try:
        print(f"🔄 Fetching data for batch {i+1}/{len(keyword_batches)}: {batch}")

        # ⏳ Send API request
        pytrends.build_payload(batch, geo='US', timeframe='today 12-m')

        # 🗺️ Get search interest by U.S. state
        df_batch = pytrends.interest_by_region()

        # ✅ Combine data
        if df_all_trends.empty:
            df_all_trends = df_batch
        else:
            df_all_trends = df_all_trends.join(df_batch, how="outer")

        # 🎉 Success log
        print(f"✅ Successfully retrieved data for batch {i+1}")

    except Exception as e:
        print(f"❌ Failed to fetch data for batch {i+1}: {batch}")
        print(f"Error: {e}")
        failed_batches.append(batch)  # Record failed batch for later retry

    # 🚨 Sleep to avoid rate-limiting (wait 20 seconds between requests)
    time.sleep(20)

# ✅ Make sure the output directory exists
output_dir = r"/Users/qibinhuang/Desktop/Desktop - Qibin的MacBook Air/"
os.makedirs(output_dir, exist_ok=True)

# ✅ Save the final compiled dataset to CSV
output_file = os.path.join(output_dir, "mental_health_trends_all_states.csv")
df_all_trends.to_csv(output_file)

# ✅ Save failed batches to a file for troubleshooting or retrying
if failed_batches:
    failed_file = os.path.join(output_dir, "failed_batches.txt")
    with open(failed_file, "w") as f:
        for batch in failed_batches:
            f.write(",".join(batch) + "\n")
    print(f"⚠️ Some requests failed. Check 'failed_batches.txt' for details.")

print(f"✅ Google Trends data saved to {output_file}")



🔄 Fetching data for batch 1/120: ['mental health emergency', 'mental health crisis', 'suicide thoughts']




✅ Successfully retrieved data for batch 1
