<a href="https://colab.research.google.com/github/Nishanth1810/12/blob/main/Blog%20Automation%20for%20Wordpress.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import requests
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from google.auth.transport.requests import Request
from io import BytesIO
import gspread
from bs4 import BeautifulSoup

# === Step 1: Setup Google Credentials ===
creds = Credentials.from_service_account_file(
    'evident-bedrock-464104-k8-3b568bdf291a.json',
    scopes=[
        'https://www.googleapis.com/auth/spreadsheets',
        'https://www.googleapis.com/auth/drive',
        'https://www.googleapis.com/auth/documents'
    ]
)

# === Step 2: Define Sheet & WordPress Configs ===
sheets = [
    {
        "url": "https://docs.google.com/spreadsheets/d/1_JHanBj9Y8q5bfrNG61PXp6reinkqNSkyhdt21G4b80/edit?gid=0#gid=0",
        "sheet_name": "Sheet1",
        "wp_site": "https://amcounsellingservices.ca/",
        "wp_user": "SEO-Team",
        "wp_app_password": "k7MP oN5O SdDL S2IF Edie kPaX"
    },

]

# === Step 3: Clean HTML to Remove Inline Styles and Classes ===
def clean_html(raw_html):
    soup = BeautifulSoup(raw_html, "html.parser")
    # Remove <style> tags
    for tag in soup.find_all("style"):
        tag.decompose()
    # Remove inline styles, class, and id attributes
    for tag in soup.find_all(True):
        tag.attrs = {key: val for key, val in tag.attrs.items() if key not in ['style', 'class', 'id']}
    return str(soup)

# === Step 4: Google Docs to Clean HTML ===
def get_doc_content(doc_url, session):
    try:
        doc_id = doc_url.split("/d/")[1].split("/")[0]
        creds.refresh(Request())
        export_url = f"https://docs.google.com/feeds/download/documents/export/Export?id={doc_id}&exportFormat=html"
        headers = {'Authorization': f'Bearer {creds.token}'}
        response = session.get(export_url, headers=headers)
        if response.status_code == 200:
            return clean_html(response.text)
        else:
            print(f"⚠️ Failed to fetch HTML export: {response.status_code}")
            return ''
    except Exception as e:
        print(f"⚠️ Error fetching doc HTML: {e}")
        return ''

# === Step 5: Upload Featured Image ===
def upload_image(image_url, wp_user, wp_app_password, wp_site, session):
    try:
        r = session.get(image_url)
        if r.status_code != 200:
            print("⚠️ Could not download image")
            return None

        headers = {
            'Content-Disposition': 'attachment; filename=image.jpg',
            'Content-Type': 'image/jpeg'
        }

        res = session.post(
            f"{wp_site}/wp-json/wp/v2/media",
            headers=headers,
            auth=(wp_user, wp_app_password),
            data=BytesIO(r.content)
        )

        if res.status_code in [200, 201]:
            return res.json().get("id")
        else:
            print(f"⚠️ Upload failed: {res.text}")
            return None
    except Exception as e:
        print(f"⚠️ Exception during image upload: {e}")
        return None

# === Step 6: Post to WordPress ===
def post_to_wordpress(title, content, slug, status, meta_description, category_id, wp_user, wp_app_password, wp_site, featured_image_id=None):
    url = f"{wp_site}/wp-json/wp/v2/posts"
    headers = {'Content-Type': 'application/json'}

    payload = {
        "title": title,
        "slug": slug,
        "status": status,
        "content": content,
        "categories": [int(category_id)],
        "meta": {
            "yoast_wpseo_metadesc": meta_description
        }
    }

    if featured_image_id:
        payload["featured_media"] = featured_image_id

    response = requests.post(url, auth=(wp_user, wp_app_password), headers=headers, json=payload)
    if response.status_code == 201:
        print(f"✅ Posted: {title}")
    else:
        print(f"❌ Failed to post '{title}' ({response.status_code}): {response.text}")

# === Step 7: Loop Over Sheets ===
creds.refresh(Request())
gc = gspread.authorize(creds)
session = requests.Session()

for site in sheets:
    print(f"📘 Processing Sheet: {site['sheet_name']}")
    sheet_id = site["url"].split("/d/")[1].split("/")[0]
    worksheet = gc.open_by_key(sheet_id).worksheet(site["sheet_name"])
    df = pd.DataFrame(worksheet.get_all_records())

    # Validate columns
    required_columns = {'title', 'slug', 'status', 'meta_description', 'category_id', 'content_doc_url'}
    missing = required_columns - set(df.columns)
    if missing:
        raise ValueError(f"Missing columns in {site['sheet_name']}: {missing}")

    for _, row in df.iterrows():
        try:
            # Get cleaned HTML
            html_content = get_doc_content(row['content_doc_url'], session)

            # Auto-generate slug if empty
            slug = row['slug'] or row['title'].lower().strip().replace(' ', '-')

            # Upload featured image if present
            image_id = None
            if 'featured_image_url' in row and row['featured_image_url']:
                image_id = upload_image(
                    row['featured_image_url'],
                    site["wp_user"],
                    site["wp_app_password"],
                    site["wp_site"],
                    session
                )

            # Post to WordPress
            post_to_wordpress(
                title=row['title'],
                content=html_content,
                slug=slug,
                status=row['status'],
                meta_description=row['meta_description'],
                category_id=row['category_id'],
                wp_user=site["wp_user"],
                wp_app_password=site["wp_app_password"],
                wp_site=site["wp_site"],
                featured_image_id=image_id
            )

        except Exception as e:
            print(f"❌ Error processing '{row.get('title', 'Unknown')}': {e}")


FileNotFoundError: [Errno 2] No such file or directory: 'evident-bedrock-464104-k8-3b568bdf291a.json'