In [1]:
import warnings
warnings.filterwarnings("ignore")
import logging
logging.getLogger("asyncio").setLevel(logging.ERROR)
logging.getLogger("pyppeteer").setLevel(logging.ERROR)
logging.getLogger("duckduckgo_search").setLevel(logging.ERROR)
!pip install pyppeteer duckduckgo-search nest_asyncio
!pip install -q ddgs pyppeteer

import nest_asyncio
nest_asyncio.apply()

import asyncio
from pyppeteer import launch

import nest_asyncio
nest_asyncio.apply()

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

from ddgs import DDGS
import asyncio
from pyppeteer import launch



## Optimized Phone Views

In [13]:
def load_templates(path="base_tail_templates.txt"):
    sections = {"LOREM": [], "BASE": [], "TAIL": []}
    current = None
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            # Detect a new section
            if line.startswith("[") and line.endswith("]"):
                name = line[1:-1].upper()
                if name in sections:
                    current = name
                continue

            # Store lines under the active section
            if current:
                sections[current].append(line)
    return sections["LOREM"], sections["BASE"], sections["TAIL"]

# Load everything from notepad
LOREM, BASE_TEMPLATES, TAIL_TEMPLATES = load_templates("base_tail_templates.txt")


import os
import random
from pathlib import Path

# ========= CONFIG =========
DUMMY_DIR = "dummy_pages"
IMAGE_DIR = "unsplash_images"
NUM_PAGES = 25
KEYWORD = "Bahlil"
# ==========================

os.makedirs(DUMMY_DIR, exist_ok=True)

# Load local image filenames
if os.path.exists(IMAGE_DIR):
    image_files = [f for f in os.listdir(IMAGE_DIR) if f.lower().endswith((".jpg", ".jpeg", ".png"))]
else:
    image_files = []

# Medium-style mobile CSS
MEDIUM_STYLE = """
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600&family=Merriweather:wght@300;400;700&family=Poppins:wght@300;400;600;700&display=swap');

:root {
  --max-width: 420px; /* phone-friendly width */
}

body {
  font-family: 'Merriweather', Georgia, serif;
  max-width: var(--max-width);
  margin: auto;
  padding: 16px;
  line-height: 1.6;
  color: #222;
  background: #fff;
  -webkit-font-smoothing: antialiased;
  -moz-osx-font-smoothing: grayscale;
}

/* NEW: Tagline */
.tagline {
  font-family: 'Inter', sans-serif;
  font-size: 26px;
  color: #888;
  text-transform: uppercase;
  letter-spacing: 1px;
  margin-top: 12px;
  margin-bottom: 12px;
  text-align: center;
}

/* hero image */
img.inline {
  width: 100%;
  max-height: 320px;
  border-radius: 6px;
  margin: 16px 0;
  object-fit: cover;
}

/* author meta */
.author {
  color: #6b6b6b;
  font-size: 13px;
  margin-bottom: 16px;
  font-family: 'Inter', system-ui, -apple-system, 'Segoe UI', Roboto, 'Helvetica Neue', Arial;
  display: flex;
  align-items: center;
  gap: 10px;
}
.author img {
  width: 36px;
  height: 36px;
  border-radius: 50%;
  object-fit: cover;
}

/* title */
h1 {
  font-family: 'Poppins', sans-serif;
  font-weight: 700;
  font-size: 40px;
  line-height: 1.2;
  padding: 50px 0 20px 0;
  margin: 8px 0 12px 0;
  color: #111;
}

/* subtitle & body */
.subtitle {
  color: #6d6d6d;
  font-size: 14px;
  margin-bottom: 16px;
  font-family: 'Inter', sans-serif;
}

p {
  font-size: 16px;
  color: #2b2b2b;
  margin: 14px 0;
  font-family: 'Merriweather', serif;
}

/* hero image */
img.hero {
  width: 100%;
  max-height: 320px;
  border-radius: 6px;
  margin: 16px 0;
  object-fit: cover;
}

/* headings inside article */
h2 {
  font-family: 'Poppins', sans-serif;
  font-size: 20px;
  margin-top: 24px;
  margin-bottom: 10px;
  font-weight: 600;
}

/* bottom bar */
.bottom-bar {
  display: flex;
  align-items: center;
  gap: 10px;
  border-top: 1px solid #eee;
  padding-top: 12px;
  margin-top: 24px;
  color: #666;
  font-family: 'Inter', sans-serif;
  font-size: 13px;
}

/* tags */
.tag {
  display: inline-block;
  background: #f3f4f6;
  padding: 5px 8px;
  border-radius: 14px;
  font-size: 12px;
  margin-left: 6px;
  font-family: 'Inter', sans-serif;
}
</style>

<!-- INSERTED TAGLINE -->
<div class="tagline">The Daily Post — your dummy-pages-generated news</div>
"""

def random_paragraph():
    return " ".join(random.sample(LOREM, k=random.randint(2, 4)))


def pick_random_image():
    if not image_files:
        return None
    return random.choice(image_files)

def generate_dynamic_title(keyword, i):
    # Pick templates
    base = random.choice(BASE_TEMPLATES)
    tail = random.choice(TAIL_TEMPLATES)
    # Inject dynamic number for special template
    if "#" in tail:
        tail = tail.format(i)
    # Positions: front / middle / end
    position = random.choice(["front", "middle", "end"])

    # Build title based on keyword position
    if position == "front":
        title = f"{keyword}: {base.format(keyword=keyword)} — {tail}"
    elif position == "middle":
        title = f"{base.format(keyword=keyword)} — {keyword} — {tail}"
    else:  # end
        title = f"{base.format(keyword=keyword)} — {tail} — {keyword}"

    return title


def build_dummy_html(title, keyword):
    html = "<html><head>"
    html += MEDIUM_STYLE
    html += "</head><body>"

    # Title
    html += f"<h1>{title}</h1>"
    html += f"<div class='author'>Written by AI • Contains keyword: <b>{keyword}</b></div>"

    # Add random intro image (50% chance)
    if random.random() < 1:
        img_file = pick_random_image()
        if img_file:
            html += f"""<img class="inline" src="../{IMAGE_DIR}/{img_file}" alt="inline image">"""

    # Main paragraphs
    for _ in range(random.randint(4, 7)):
        html += f"<p>{random_paragraph()}</p>"

    html += "</body></html>"
    return html


# ====== GENERATE 10 PAGES ======
for i in range(1, NUM_PAGES + 1):
    title = generate_dynamic_title(KEYWORD, i)
    html = build_dummy_html(title, KEYWORD)

    file_path = Path(DUMMY_DIR) / f"dummy_{KEYWORD}_{i}.html"

    with open(file_path, "w", encoding="utf-8") as f:
        f.write(html)

    print(f"Generated → {file_path}")

print("\nAll dummy Medium-style pages created successfully!")

Generated → dummy_pages\dummy_Bahlil_1.html
Generated → dummy_pages\dummy_Bahlil_2.html
Generated → dummy_pages\dummy_Bahlil_3.html
Generated → dummy_pages\dummy_Bahlil_4.html
Generated → dummy_pages\dummy_Bahlil_5.html
Generated → dummy_pages\dummy_Bahlil_6.html
Generated → dummy_pages\dummy_Bahlil_7.html
Generated → dummy_pages\dummy_Bahlil_8.html
Generated → dummy_pages\dummy_Bahlil_9.html
Generated → dummy_pages\dummy_Bahlil_10.html
Generated → dummy_pages\dummy_Bahlil_11.html
Generated → dummy_pages\dummy_Bahlil_12.html
Generated → dummy_pages\dummy_Bahlil_13.html
Generated → dummy_pages\dummy_Bahlil_14.html
Generated → dummy_pages\dummy_Bahlil_15.html
Generated → dummy_pages\dummy_Bahlil_16.html
Generated → dummy_pages\dummy_Bahlil_17.html
Generated → dummy_pages\dummy_Bahlil_18.html
Generated → dummy_pages\dummy_Bahlil_19.html
Generated → dummy_pages\dummy_Bahlil_20.html
Generated → dummy_pages\dummy_Bahlil_21.html
Generated → dummy_pages\dummy_Bahlil_22.html
Generated → dummy_p