In [None]:
1. Aotomation Pillar

In [None]:
# anti_automation.py

In [10]:
import requests
from typing import Tuple, List

# ---------- Anti-Automation Absence Check ----------

def check_anti_automation_absence(url: str, html: str) -> Tuple[int, List[str], List[str]]:
    score = 0
    issues = []
    recommendations = []

    try:
        # 1. Check for bot-blocking headers
        response = requests.get(url, timeout=10)
        headers = response.headers

        if "x-robots-tag" not in headers or "noindex" not in headers.get("x-robots-tag", "").lower():
            score += 1
        else:
            issues.append("Site blocks bots via X-Robots-Tag header.")
            recommendations.append("Remove aggressive noindex/nofollow unless essential.")

        # 2. Check for CAPTCHAs on homepage
        if "captcha" in response.text.lower():
            issues.append("Potential CAPTCHA challenge found on homepage.")
            recommendations.append("Use CAPTCHAs only on sensitive actions like signups or payments.")
        else:
            score += 2

        # 3. robots.txt check
        parsed = requests.utils.urlparse(url)
        robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
        try:
            r_txt = requests.get(robots_url, timeout=5)
            if "disallow: /" not in r_txt.text.lower():
                score += 1
            else:
                issues.append("robots.txt blocks general crawling.")
                recommendations.append("Update robots.txt to allow general crawling.")
        except:
            issues.append("robots.txt not found or unreachable.")
            recommendations.append("Ensure robots.txt is accessible and well-configured.")

        # 4. Check for fingerprinting scripts (basic heuristic)
        if any(s in response.text.lower() for s in ["fingerprintjs", "navigator.plugins", "navigator.hardwareconcurrency"]):
            issues.append("Potential fingerprinting scripts detected.")
            recommendations.append("Avoid aggressive fingerprinting that blocks automation.")
        else:
            score += 1

        return min(score, 5), issues, recommendations

    except Exception as e:
        issues.append("Anti-automation check failed.")
        recommendations.append(str(e))
        return 0, issues, recommendations

In [11]:
# checking what types works its doing 
url = input("Enter URL to check Anti-Automation Signals: ")

try:
    html = requests.get(url, timeout=10).text
    
    score, issues, recommendations = check_anti_automation_absence(url, html)

    print("\n===== ANTI-AUTOMATION ANALYSIS =====")
    print(f"Final Score: {score}/5\n")

    print("Issues Found:")
    if issues:
        for i in issues:
            print(" -", i)
    else:
        print(" No issues detected.")

    print("\nRecommendations:")
    if recommendations:
        for r in recommendations:
            print(" -", r)
    else:
        print(" No recommendations — site looks automation-friendly.")

except Exception as e:
    print("Error fetching URL:", str(e))


Enter URL to check Anti-Automation Signals:  https://www.google.com/search



===== ANTI-AUTOMATION ANALYSIS =====
Final Score: 4/5

Issues Found:
 - robots.txt blocks general crawling.

Recommendations:
 - Update robots.txt to allow general crawling.


In [4]:
import requests
from typing import Tuple, List
from bs4 import BeautifulSoup

# ============================================================
#      ADVANCED ANTI-AUTOMATION ABSENCE CHECK  (v2.0)
#        Fully aligned with ARI Anti-Automation Pillar
# ============================================================

def check_anti_automation_absence(url: str, html: str) -> Tuple[int, List[str], List[str]]:
    score = 0
    issues = []
    recommendations = []

    # --------------------------------------
    # 1. User-Agent Variance Test (NEW)
    # --------------------------------------
    user_agents = {
        "desktop": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
        "mobile": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0)",
        "bot": "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
    }
    
    responses = {}
    for key, ua in user_agents.items():
        try:
            r = requests.get(url, timeout=10, headers={"User-Agent": ua})
            responses[key] = r
        except:
            responses[key] = None

    # If desktop loads but bot UA fails → bot-blocking detected
    if responses["desktop"] and (not responses["bot"] or responses["bot"].status_code >= 400):
        issues.append("Site behaves differently for bot user-agents (bot-blocking likely).")
        recommendations.append("Allow consistent content for bot UAs where safe.")
    else:
        score += 1

    # --------------------------------------
    # 2. Rate-Limit & Throttling Detection (NEW)
    # --------------------------------------
    main_res = responses["desktop"]
    if not main_res:
        issues.append("URL unreachable.")
        recommendations.append("Retry or ensure domain resolves.")
        return 0, issues, recommendations

    if main_res.status_code == 429:
        issues.append("Rate limiting detected (429 Too Many Requests).")
        recommendations.append("Increase rate limits or whitelist automation agents.")
    else:
        score += 1

    # Soft throttling
    if "retry-after" in main_res.headers:
        issues.append("Throttling detected via Retry-After header.")
        recommendations.append("Tune throttling rules to support agent flows.")

    # --------------------------------------
    # 3. Bot-Blocking Headers (IMPROVED)
    # --------------------------------------
    xr = main_res.headers.get("x-robots-tag", "").lower()

    if any(tag in xr for tag in ["noindex", "nofollow"]):
        issues.append("Bot-blocking X-Robots-Tag detected.")
        recommendations.append("Remove restrictive directives if discovery is required.")
    else:
        score += 1

    # --------------------------------------
    # 4. robots.txt Smart Analysis (IMPROVED)
    # --------------------------------------
    try:
        parsed = requests.utils.urlparse(url)
        robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
        robots = requests.get(robots_url, timeout=5)

        # Missing robots.txt = neutral (not a problem)
        if robots.status_code != 200:
            recommendations.append("robots.txt missing — optional but recommended.")
        else:
            txt = robots.text.lower()

            if "disallow: /" in txt:
                issues.append("robots.txt fully blocks all crawling.")
                recommendations.append("Remove 'Disallow: /' for agents that require access.")
            else:
                score += 1

    except:
        recommendations.append("Could not fetch robots.txt — treat as neutral.")

    # --------------------------------------
    # 5. CAPTCHA Scanner (Improved)
    # --------------------------------------
    lower_html = main_res.text.lower()
    captcha_keywords = [
        "g-recaptcha", "h-captcha", "cf-challenge",
        "turnstile", "please verify you are a human",
        "are you a robot"
    ]

    if any(c in lower_html for c in captcha_keywords):
        issues.append("Possible CAPTCHA or challenge script found.")
        recommendations.append("Avoid CAPTCHA on navigation flows.")
    else:
        score += 1

    # --------------------------------------
    # 6. Fingerprinting / Bot-Manager Detection (IMPROVED)
    # --------------------------------------
    fingerprint_signals = [
        "fingerprintjs", "perimeterx", "arkose",
        "humansecurity", "cf-ray", "bot-detection",
        "navigator.hardwareconcurrency", "webdriver"
    ]

    if any(sig in lower_html for sig in fingerprint_signals):
        issues.append("Possible bot-detection or fingerprinting libraries detected.")
        recommendations.append("Ensure fingerprinting does not block legitimate agents.")
    else:
        score += 1

    # --------------------------------------
    # Final scoring (0–5)
    # --------------------------------------
    score = min(score, 5)
    return score, issues, recommendations


In [18]:
# checking what types works its doing  in new recomendation
url = input("Enter URL to check Anti-Automation Signals: ")

try:
    html = requests.get(url, timeout=10).text
    
    score, issues, recommendations = check_anti_automation_absence(url, html)

    print("\n===== ANTI-AUTOMATION ANALYSIS =====")
    print(f"Final Score: {score}/5\n")

    print("Issues Found:")
    if issues:
        for i in issues:
            print(" -", i)
    else:
        print(" No issues detected.")

    print("\nRecommendations:")
    if recommendations:
        for r in recommendations:
            print(" -", r)
    else:
        print(" No recommendations — site looks automation-friendly.")

except Exception as e:
    print("Error fetching URL:", str(e))

Enter URL to check Anti-Automation Signals:  https://www.google.com/search



===== ANTI-AUTOMATION ANALYSIS =====
Final Score: 4/5

Issues Found:
 - robots.txt blocks general crawling.

Recommendations:
 - Update robots.txt to allow general crawling.
