In [None]:
# Libraries 

import requests
import base64
import pandas as pd

In [None]:
# Access

wordpress_user = 'username' 
wordpress_password = 'password' #Generated application password (temporary)
wordpress_credentials = f"{wordpress_user}:{wordpress_password}"
wordpress_token = base64.b64encode(wordpress_credentials.encode())
wordpress_header = {'Authorization': 'Basic ' + wordpress_token.decode('utf-8')}

In [None]:
# Read content with code: It's the same if you visit https://yourdomain.com/wp-json/wp/v2/posts

def read_wordpress_posts():
 api_url = 'https://yourdomain.com/wp-json/wp/v2/posts'
 response = requests.get(api_url)
 response_json = response.json()
 print(response_json)

In [None]:
# Get the total number of post pages

def get_total_pagecount():
 api_url = 'https://yourdomain.com/wp-json/wp/v2/posts?page=1&per_page=100'
 response = requests.get(api_url)
 pages_count = response.headers['X-WP-TotalPages']
 return int(pages_count)

In [None]:
# Get all the content using a loop that iterates through the pages

def read_wordpress_post_with_pagination():
 total_pages = get_total_pagecount()
 current_page = 1
 all_page_items_json = []
 while current_page <= total_pages:
     api_url = f"https://yourdomain.com/wp-json/wp/v2/posts?page={current_page}&per_page=100"
     page_items = requests.get(api_url)
     page_items_json = page_items.json()
     all_page_items_json.extend(page_items_json)
     current_page = current_page + 1
 return all_page_items_json

In [None]:
post_data = read_wordpress_post_with_pagination()

In [None]:
# You already have the content! Let's play! A list where each item is a dictionary...

# type(post_data)
# len(post_data)
# post_data[0]
# post_data[0].keys()

In [None]:
# In this case, we need the keys ID, date, link, title, categories, and tags

filtered_posts = []

for post in post_data:
    filtered_post = {
        'id': post.get('id'),
        'date': post.get('date'),
        'link': post.get('link'),
        'title': post.get('title', {}).get('rendered', ''),
        'categories': post.get('categories', []),
        'tags': post.get('tags', [])
    }
    filtered_posts.append(filtered_post)


In [None]:
# Get the name of the categories and tags.

def get_categories_mapping():
    url = "https://yourdomain.com/wp-json/wp/v2/categories?per_page=100"
    response = requests.get(url)
    categories = response.json()
    return {cat['id']: cat['name'] for cat in categories}

def get_tags_mapping():
    url = "https://yourdomain.com/wp-json/wp/v2/tags?per_page=100"
    response = requests.get(url)
    tags = response.json()
    return {tag['id']: tag['name'] for tag in tags}

categories_mapping = get_categories_mapping()
tags_mapping = get_tags_mapping()

In [None]:
# The function is limited to 100 entries per pagination. We check if any of these reach that 100 to expand it:

print(len(categories_mapping))
print(len(tags_mapping))

In [None]:
# Example: We need to expand pagination in tags

def get_all_tags():
    all_tags = []
    current_page = 1
    while True:
        url = f'https://yourdomain.com/wp-json/wp/v2/tags?page={current_page}&per_page=100'
        response = requests.get(url)
        if response.status_code != 200:
            break
        data = response.json()
        if not data:
            break
        all_tags.extend(data)
        current_page += 1
    return all_tags

all_tags_mapping = get_all_tags()

all_tags_mapping = {tag['id']: tag['name'] for tag in tags_data}

In [None]:
# Replace the IDs with the names in the already extracted list

filtered_posts_def = []

for post in filtered_posts:
    cat_ids = post.get('categories', [])
    tag_ids = post.get('tags', [])

    categories = [categories_mapping.get(cat_id, f"ID:{cat_id}") for cat_id in cat_ids]
    tags = [all_tags_mapping.get(tag_id, f"ID:{tag_id}") for tag_id in tag_ids]

    filtered_posts_def.append({
        'id': post['id'],
        'date': post['date'],
        'link': post['link'],
        'title': post['title'],
        'categories': categories,
        'tags': tags
    })


In [None]:
# Done! Let's convert to DataFrame

df = pd.DataFrame(filtered_posts_def)

In [None]:
# Tags and categories are lists. For readability, we converted them to text strings.

df['tags'] = df['tags'].apply(lambda x: ', '.join(x) if isinstance(x, list) else '')
df['categories'] = df['categories'].apply(lambda x: ', '.join(x) if isinstance(x, list) else '')

In [None]:
# Challenge achieved

df.head()