# Fetch comments

In [None]:
import re
import requests
import json
import pandas as pd


In [None]:
BASE_URL = "https://www.drupal.org/api-d7"
HEADERS = {
    'Accept': 'application/json',
    'Content-Type': 'application/json',
    'User-Agent': 'Drucom 0.1.0'
}


def check_comment_total(uid):
    """
    Given a list of user IDs, check the total number of comments for each user.
    """
    params = {
        'full': 0,
        'limit': 1,
        'sort': 'cid',
        'direction': 'ASC',
    }
    params['author'] = uid

    total = 0

    try:
        response = requests.get(
            f"{BASE_URL}/comment.json", headers=HEADERS, params=params)
        response.raise_for_status()
        data = response.json()
        last_page_url = data.get('last', '')
        total = 0
        if last_page_url:
            match = re.search(r'page=(\d+)', last_page_url)
            if match:
                total = int(match.group(1))
    except Exception as e:
        print(f"Unexpected error for UID {uid}: {e}")

    print(f"User {uid} has {total} pages of comments.")
    return total


In [None]:
uids = json.load(open('../data/json/uids.json', 'r'))

user_with_comments = {
    'uid': [],
    'total': [],
}
for uid in uids:
    try:
        total = check_comment_total(uid)
        if total > 0:
            user_with_comments['uid'].append(uid)
            user_with_comments['total'].append(total)
    except Exception as e:
        print(f"Error processing UID {uid}: {e}")

df = pd.DataFrame(user_with_comments, columns=['uid', 'total'])
df.to_csv('user_with_comments.csv', index=False)
print('Fetched user IDs with comments.')


UID 1 has 19090 pages of comments.
UID 2 has 605 pages of comments.
UID 3 has 0 pages of comments.
Fetched user IDs with comments.
