In [None]:
import requests
import pandas as pd
import logging
from datetime import datetime
from typing import List, Dict
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build

class WordPressIndexOptimizer:
    def __init__(self, gsc_credentials_file: str, domains: List[str]):
        """Initialize WordPress Indexing Optimizer"""
        self.domains = domains
        self.credentials = self._load_credentials(gsc_credentials_file)
        self.search_console = build('searchconsole', 'v1', credentials=self.credentials)
        self.logger = logging.getLogger(__name__)
        logging.basicConfig(level=logging.INFO)

    def _load_credentials(self, credentials_file: str):
        """Load Google Search Console credentials"""
        try:
            return Credentials.from_service_account_file(
                credentials_file,
                scopes=['https://www.googleapis.com/auth/webmasters']
            )
        except Exception as e:
            self.logger.error(f"Credential loading failed: {e}")
            raise

    def fetch_wordpress_posts(self, domain: str) -> List[Dict]:
        """Retrieve posts from WordPress REST API"""
        posts = []
        page = 1
        while True:
            try:
                response = requests.get(
                    f"https://{domain}/wp-json/wp/v2/posts",
                    params={'page': page, 'per_page': 100, 'status': 'publish'}
                )
                current_posts = response.json()
                if not current_posts:
                    break
                posts.extend(current_posts)
                page += 1
            except Exception as e:
                self.logger.error(f"Error fetching posts from {domain}: {e}")
                break
        return posts

    def analyze_indexing_challenges(self, post: Dict) -> Dict:
        """Analyze potential indexing challenges for a post"""
        challenges = []
        
        # Check content length
        content_length = len(post.get('content', {}).get('rendered', ''))
        if content_length < 300:
            challenges.append("Low content length (< 300 characters)")
        
        # Check for duplicate content
        title = post.get('title', {}).get('rendered', '')
        if len(title) < 10:
            challenges.append("Short or missing title")
        
        # Check metadata
        if not post.get('yoast_meta', {}):
            challenges.append("Missing SEO metadata")
        
        return {
            'challenges': challenges,
            'recommended_actions': [
                f"Improve content to at least 300 words" if "Low content length" in challenges else None,
                f"Add descriptive title" if "Short or missing title" in challenges else None,
                f"Add SEO metadata" if "Missing SEO metadata" in challenges else None
            ]
        }

    def check_indexing_status(self, url: str) -> Dict:
        """Check indexing status and get detailed information"""
        try:
            site_url = f"https://{url.split('/')[2]}/"
            request = self.search_console.urlInspection().index().inspect(
                body={
                    "inspectionUrl": url,
                    "siteUrl": site_url
                }
            )
            response = request.execute()
            
            inspection_result = response.get("inspectionResult", {})
            index_status = inspection_result.get("indexStatusResult", {})
            
            return {
                'indexed': index_status.get("coverageState") == "INDEXED",
                'last_crawl': index_status.get("lastCrawlTime"),
                'crawl_state': index_status.get("coverageState"),
                'index_verdict': index_status.get("verdict")
            }
        except Exception as e:
            return {
                'indexed': False,
                'error': str(e)
            }

    def request_indexing(self, url: str):
        """Request indexing for a specific URL"""
        try:
            site_url = f"https://{url.split('/')[2]}/"
            request_body = {"urlNotificationMetadata": [{"url": url}]}
            
            self.search_console.urlInspection().indexing().batchRequest(
                body=request_body,
                siteUrl=site_url
            ).execute()
            
            self.logger.info(f"Indexing requested for: {url}")
            return True
        except Exception as e:
            self.logger.error(f"Indexing request failed for {url}: {e}")
            return False

    def generate_comprehensive_report(self) -> pd.DataFrame:
        """Generate comprehensive indexing report"""
        all_posts_data = []

        for domain in self.domains:
            posts = self.fetch_wordpress_posts(domain)
            
            for post in posts:
                url = post.get('link', '')
                title = post.get('title', {}).get('rendered', 'No Title')
                
                # Check indexing status
                index_check = self.check_indexing_status(url)
                
                # Analyze indexing challenges
                indexing_analysis = self.analyze_indexing_challenges(post)
                
                # Auto-request indexing if not indexed
                indexing_request_status = 'N/A'
                if not index_check.get('indexed', False):
                    indexing_request_status = 'Requested' if self.request_indexing(url) else 'Failed'
                
                post_data = {
                    'Domain': domain,
                    'Title': title,
                    'URL': url,
                    'Indexed': 'Yes' if index_check.get('indexed', False) else 'No',
                    'Indexing Request': indexing_request_status,
                    'Crawl State': index_check.get('crawl_state', 'Unknown'),
                    'Last Crawl': index_check.get('last_crawl', 'N/A'),
                    'Indexing Challenges': ', '.join(indexing_analysis.get('challenges', [])) or 'None',
                    'Recommended Actions': ', '.join(filter(None, indexing_analysis.get('recommended_actions', []))) or 'None'
                }
                all_posts_data.append(post_data)

        return pd.DataFrame(all_posts_data)

    def save_report(self, df: pd.DataFrame):
        """Save report with color-coded formatting"""
        filename = f"wordpress_indexing_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
        
        with pd.ExcelWriter(filename, engine='xlsxwriter') as writer:
            df.to_excel(writer, index=False, sheet_name='Indexing Report')
            workbook = writer.book
            worksheet = writer.sheets['Indexing Report']
            
            # Formatting
            green_format = workbook.add_format({'bg_color': '#C6EFCE'})
            yellow_format = workbook.add_format({'bg_color': '#FFEB9C'})
            red_format = workbook.add_format({'bg_color': '#FFC7CE'})
            
            # Conditional formatting
            worksheet.conditional_format('E2:E' + str(len(df) + 1), {
                'type': 'text',
                'criteria': 'equal to',
                'value': '"Yes"',
                'format': green_format
            })
            
            worksheet.conditional_format('E2:E' + str(len(df) + 1), {
                'type': 'text',
                'criteria': 'equal to',
                'value': '"No"',
                'format': red_format
            })
        
        self.logger.info(f"Report saved to {filename}")
        return filename

def main():
    # Configuration - REPLACE WITH YOUR DETAILS
    GSC_CREDENTIALS = "/path/to/google_search_console_credentials.json"
    WORDPRESS_DOMAINS = ["yourdomain.com"]
    
    # Initialize indexing optimizer
    optimizer = WordPressIndexOptimizer(GSC_CREDENTIALS, WORDPRESS_DOMAINS)
    
    # Generate comprehensive report
    indexing_report = optimizer.generate_comprehensive_report()
    
    # Save report
    report_file = optimizer.save_report(indexing_report)

if __name__ == "__main__":
    main()

# WordPress Indexing Optimizer

Welcome to the **WordPress Indexing Optimizer** Jupyter Notebook. This notebook is designed to streamline the process of optimizing the indexing of WordPress posts using the Google Search Console API. The `WordPressIndexOptimizer` class encapsulates a suite of functionalities to fetch WordPress posts, analyze potential indexing challenges, check indexing status, request indexing, and generate a comprehensive report.

## 🌟 Features

- **Fetch WordPress Posts**: Seamlessly retrieve posts from the WordPress REST API.
- **Analyze Indexing Challenges**: Identify and diagnose potential issues that might hinder the indexing of posts.
- **Check Indexing Status**: Verify if a post is indexed and obtain detailed information about its indexing status.
- **Request Indexing**: Automatically request indexing for posts that are not indexed.
- **Generate Comprehensive Report**: Create a detailed report of the indexing status and challenges for all posts.
- **Save Report**: Save the report with color-coded formatting to an Excel file for easy interpretation.

## 🚀 Usage

1. **Configuration**: Update the `GSC_CREDENTIALS` and `WORDPRESS_DOMAINS` variables with your Google Search Console credentials file path and the list of WordPress domains you want to analyze.

2. **Initialize Optimizer**: Instantiate the `WordPressIndexOptimizer` class with the credentials and domains.

3. **Generate Report**: Invoke the `generate_comprehensive_report` method to generate the indexing report.

4. **Save Report**: Utilize the `save_report` method to save the report to an Excel file.

## 📘 Example

### Steps to Use the Code

1. **Set Up Google Search Console Credentials**: Ensure you have a Google Search Console credentials JSON file. Update the `GSC_CREDENTIALS` variable with the path to this file.

2. **List WordPress Domains**: Add the WordPress domains you want to analyze to the `WORDPRESS_DOMAINS` list.

3. **Run the Notebook**: Execute the cells in the notebook to initialize the optimizer, generate the report, and save it.

4. **Review the Report**: Open the generated Excel file to review the indexing status, challenges, and recommended actions for your WordPress posts.

## 🎯 What the Code Can Do and Accomplish

- **Automate Indexing Management**: Simplifies the process of managing and optimizing the indexing of WordPress posts.
- **Identify Indexing Issues**: Helps identify potential issues that may hinder the indexing of posts.
- **Improve SEO**: Provides actionable recommendations to improve the visibility and performance of content in search engine results.
- **Save Time**: Automates repetitive tasks, saving time for website administrators and SEO professionals.
- **Generate Detailed Reports**: Produces comprehensive reports that are easy to interpret and act upon.
