<a href="https://colab.research.google.com/github/Crypto-Goatz/rocketopp-website-2026/blob/main/SEO_Engine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
SEO NEURO ENGINE
----------------
1. Ingests Data (GSC, GA4, SERP)
2. Calculates Opportunity Scores based on Weighted Factors
3. Generates "Perfect" Content Briefs based on Strict Rules
4. LEARNING LOOP: Compares previous actions to current results to auto-adjust weights.
"""

import os
import json
import math
import datetime as dt
import random  # Used for simulation; replace with actual API data in prod
from dataclasses import dataclass, asdict
from typing import Dict, List, Optional, Tuple

# ==========================================
# CONFIGURATION & GLOBAL WEIGHTS (Mutable)
# ==========================================

CONFIG = {
    "SITE_URL": "https://pittsburgh-ai-web.com",
    "OUTPUT_DIR": "./seo_data",
    "MIN_WORD_COUNT_PILLAR": 2200,
    "MIN_WORD_COUNT_CLUSTER": 1000,
    "KEYWORD_DENSITY_TARGET": (0.006, 0.012) # 0.6% to 1.2%
}

# Initial Weights - The AI will adjust these over time
WEIGHTS = {
    "W_IMPRESSIONS": 0.35,
    "W_POSITION": 0.25,
    "W_CTR_GAP": 0.25,
    "W_CONVERSIONS": 0.15,
    "VOLATILITY_PENALTY": 0.12
}

EXPECTED_CTR_BY_POS = {
    1: 0.32, 2: 0.20, 3: 0.13, 4: 0.09, 5: 0.07,
    6: 0.05, 7: 0.04, 8: 0.03, 9: 0.025, 10: 0.02
}

# ==========================================
# DATA MODELS
# ==========================================

@dataclass
class PageData:
    url: str
    primary_kw: str
    clicks: int
    impressions: int
    ctr: float
    position: float
    conversions: int = 0
    intent: str = "mixed" # local, transactional, informational
    last_updated: str = ""

@dataclass
class ActionLog:
    date: str
    url: str
    action_type: str # CTR_FIX, STRIKING_DISTANCE, etc.
    original_metrics: Dict
    weights_at_time: Dict

# ==========================================
# CORE ANALYTICS ENGINE
# ==========================================

class SEOEngine:
    def __init__(self):
        self.ensure_directories()
        self.history_file = os.path.join(CONFIG["OUTPUT_DIR"], "learning_history.json")
        self.weights_file = os.path.join(CONFIG["OUTPUT_DIR"], "optimized_weights.json")
        self.load_weights()

    def ensure_directories(self):
        if not os.path.exists(CONFIG["OUTPUT_DIR"]):
            os.makedirs(CONFIG["OUTPUT_DIR"])

    def load_weights(self):
        if os.path.exists(self.weights_file):
            with open(self.weights_file, 'r') as f:
                global WEIGHTS
                WEIGHTS = json.load(f)
                print("Loaded optimized weights from previous learning sessions.")

    def clamp(self, x, minimum, maximum):
        return max(minimum, min(x, maximum))

    def expected_ctr(self, position):
        pos = int(round(position))
        return EXPECTED_CTR_BY_POS.get(pos, 0.01)

    def calculate_opportunity_score(self, page: PageData) -> float:
        # Normalize Data
        imp_score = self.clamp(math.log10(page.impressions + 1) / 5.0, 0, 1)
        pos_score = self.clamp((50 - page.position) / 50.0, 0, 1)

        exp_ctr = self.expected_ctr(page.position)
        ctr_gap = self.clamp((exp_ctr - page.ctr) / exp_ctr, 0, 1) if exp_ctr > 0 else 0

        conv_score = self.clamp(math.log10(page.conversions + 1) / 2.0, 0, 1)

        # Apply Current Global Weights
        score = (
            WEIGHTS["W_IMPRESSIONS"] * imp_score +
            WEIGHTS["W_POSITION"] * pos_score +
            WEIGHTS["W_CTR_GAP"] * ctr_gap +
            WEIGHTS["W_CONVERSIONS"] * conv_score
        )
        return round(score, 4)

    def classify_bucket(self, page: PageData) -> str:
        exp_ctr = self.expected_ctr(page.position)

        # BUCKET 1: High Imp, Low CTR
        if page.impressions > 200 and page.ctr < (exp_ctr * 0.7):
            return "CTR_FIX"

        # BUCKET 2: Striking Distance
        if 4 <= page.position <= 15:
            return "STRIKING_DISTANCE"

        # BUCKET 3: Triage (Drop detection requires history, simplified here)
        if page.position > 20 and page.impressions > 500:
            return "RELEVANCE_REBUILD"

        return "MONITOR"

    # ==========================================
    # THE LEARNING LOOP
    # ==========================================

    def run_learning_cycle(self, current_data: List[PageData]):
        """
        Compare current metrics against the Action Log from 7-28 days ago.
        If an action worked, reinforce the weights that prioritized it.
        """
        if not os.path.exists(self.history_file):
            return

        with open(self.history_file, 'r') as f:
            history = json.load(f)

        # Simple Learning Logic:
        # 1. Find actions older than 7 days
        # 2. Check if the URL's traffic/rank improved
        # 3. If improved, nudge weights in favor of that action's characteristics

        for entry in history:
            # (Simulation of finding the matching current page)
            curr_page = next((p for p in current_data if p.url == entry['url']), None)

            if curr_page:
                delta_traffic = curr_page.clicks - entry['original_metrics']['clicks']

                if delta_traffic > 0:
                    # REWARD: This action worked.
                    # If it was a CTR fix, boost CTR weight slightly.
                    if entry['action_type'] == "CTR_FIX":
                        WEIGHTS["W_CTR_GAP"] += 0.01
                        print(f"LEARNING: Boosting CTR Weight due to success on {entry['url']}")
                    elif entry['action_type'] == "STRIKING_DISTANCE":
                        WEIGHTS["W_POSITION"] += 0.01

        # Re-normalize weights to sum to ~1.0
        total = sum(WEIGHTS.values())
        for k in WEIGHTS:
            WEIGHTS[k] = round(WEIGHTS[k] / total, 3)

        # Save new brain
        with open(self.weights_file, 'w') as f:
            json.dump(WEIGHTS, f, indent=2)

    def log_actions(self, planned_actions: List[dict]):
        """Logs the specific actions taken today to be reviewed in the future."""
        existing_log = []
        if os.path.exists(self.history_file):
            with open(self.history_file, 'r') as f:
                existing_log = json.load(f)

        # Keep last 1000 actions
        existing_log.extend(planned_actions)
        existing_log = existing_log[-1000:]

        with open(self.history_file, 'w') as f:
            json.dump(existing_log, f, indent=2)

# ==========================================
# CONTENT & SCHEMA GENERATOR
# ==========================================

class ContentArchitect:
    def generate_brief(self, page: PageData, bucket: str) -> Dict:
        """
        Generates the EXACT specs for the rewrite based on strict rules.
        """
        is_pillar = "pillar" in page.url or page.impressions > 5000
        target_words = 3200 if is_pillar else 1400

        # Calculate Strict Densities
        min_kw = int(target_words * CONFIG["KEYWORD_DENSITY_TARGET"][0])
        max_kw = int(target_words * CONFIG["KEYWORD_DENSITY_TARGET"][1])

        brief = {
            "meta_strategy": {
                "title": f"Create 3 variations. Must include '{page.primary_kw}' near front. Use brackets for CTR e.g. [Year Update]",
                "h1": f"Exact match: {page.primary_kw}",
                "slug": page.url.split('/')[-1]
            },
            "structural_rules": {
                "target_word_count": target_words,
                "reading_level": "Grade 8",
                "paragraph_limit": "85 words max",
                "sentence_limit": "20 words avg"
            },
            "keyword_engineering": {
                "primary": page.primary_kw,
                "density_target": "0.6% - 1.2%",
                "exact_count_range": [min_kw, max_kw],
                "mandatory_placements": [
                    "First 100 words",
                    "One H2 exact match",
                    "Last 120 words"
                ]
            },
            "schema_stack": self.get_schema_strategy(page.intent, page.url)
        }

        # Bucket specific instructions
        if bucket == "CTR_FIX":
            brief["priority_action"] = "REWRITE_META_AND_INTRO"
            brief["specific_instruction"] = "The content ranks but doesn't get clicks. Rewrite the Title Tag and the first <p> to be a 'Hook' or 'Direct Answer'."
        elif bucket == "STRIKING_DISTANCE":
            brief["priority_action"] = "EXPAND_DEPTH"
            brief["specific_instruction"] = "Rank is 4-15. Add 2 new H2s covering 'Benefits' and 'Case Studies'. Add internal links to 3 related clusters."

        return brief

    def get_schema_strategy(self, intent, url):
        stack = ["Organization", "WebSite", "BreadcrumbList"]

        if "pittsburgh" in url or intent == "local":
            stack.extend(["LocalBusiness", "Service", "AreaServed"])
        elif intent == "transactional":
            stack.extend(["Service", "FAQPage", "Product"])
        else:
            stack.extend(["Article", "FAQPage", "Person (Author)"])

        return stack

# ==========================================
# MAIN EXECUTION (MOCKED FOR DEMO)
# ==========================================

def run_simulation():
    # 1. Initialize Engines
    brain = SEOEngine()
    architect = ContentArchitect()

    # 2. Mock Data Ingestion (Replace with API calls to GSC/GA4)
    # Simulating a mix of performing and underperforming pages
    mock_pages = [
        PageData("site.com/ai-business", "ai for business", 120, 5000, 0.024, 6.2, 5, "informational"),
        PageData("site.com/custom-crm", "custom crm pittsburgh", 15, 800, 0.018, 12.5, 2, "local"),
        PageData("site.com/seo-guide", "seo guide 2024", 800, 15000, 0.05, 3.1, 10, "informational"),
        PageData("site.com/web-dev", "web development", 50, 4000, 0.012, 8.4, 1, "transactional"),
        PageData("site.com/old-post", "marketing tips", 5, 200, 0.025, 22.0, 0, "informational"),
    ]

    # 3. Run Learning Loop (Adjust weights based on past history - simulated)
    brain.run_learning_cycle(mock_pages)

    # 4. Generate Daily Plan
    daily_plan = []
    actions_to_log = []

    for page in mock_pages:
        score = brain.calculate_opportunity_score(page)
        bucket = brain.classify_bucket(page)

        if bucket != "MONITOR":
            brief = architect.generate_brief(page, bucket)

            plan_item = {
                "url": page.url,
                "score": score,
                "bucket": bucket,
                "metrics": asdict(page),
                "brief": brief
            }
            daily_plan.append(plan_item)

            actions_to_log.append({
                "date": dt.date.today().isoformat(),
                "url": page.url,
                "action_type": bucket,
                "original_metrics": asdict(page),
                "weights_at_time": WEIGHTS.copy()
            })

    # Sort by Opportunity Score
    daily_plan.sort(key=lambda x: x['score'], reverse=True)

    # 5. Save Action Log for future learning
    brain.log_actions(actions_to_log)

    # 6. Output Final JSON for Dashboard
    output = {
        "date": dt.date.today().isoformat(),
        "active_weights": WEIGHTS,
        "tasks": daily_plan
    }

    print(json.dumps(output, indent=2))
    return output

if __name__ == "__main__":
    run_simulation()