In [7]:
import yaml
import requests
from bs4 import BeautifulSoup
import pandas as pd
import random
import os
import time
import json
import asyncio
import re

# --- Gemini API Configuration (Leave API key empty, Canvas will provide) ---
API_KEY = "AIzaSyCKNKW9HKVwVjQRDeT0lbSUz8Jh-FIeE9M"
API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-05-20:generateContent"

# --- LLM Simulation Functions ---
async def llm_generate_keywords(brand_content, competitor_content, locations):
    """
    Generates keywords using the Gemini API based on website content.
    Includes exponential backoff for API calls.
    """
    prompt = f"""
    Analyze the following content from a brand's website and its competitor.
    Identify 10-15 highly relevant, high-intent seed keywords that a potential customer
    would use to search for these products/services. Include brand terms, competitor terms,
    and general category terms. Also, consider adding location-specific keywords for these areas: {', '.join(locations)}.
    Provide the keywords as a comma-separated list.

    Brand Content (from {brand_content[:100]}...):
    {brand_content[:1000]}

    Competitor Content (from {competitor_content[:100]}...):
    {competitor_content[:1000]}
    """

    chat_history = []
    chat_history.append({"role": "user", "parts": [{"text": prompt}]})
    payload = {"contents": chat_history}

    headers = {'Content-Type': 'application/json'}
    full_api_url = f"{API_URL}?key={API_KEY}"

    retries = 0
    max_retries = 5
    while retries < max_retries:
        try:
            response = requests.post(full_api_url, headers=headers, data=json.dumps(payload))
            response.raise_for_status()
            result = response.json()

            if result.get("candidates") and result["candidates"][0].get("content") and result["candidates"][0]["content"].get("parts"):
                text = result["candidates"][0]["content"]["parts"][0]["text"]
                keywords = [kw.strip() for kw in text.split(',') if kw.strip()]
                return keywords
            else:
                print(f"LLM response structure unexpected: {result}")
                time.sleep(2 ** retries)
                retries += 1
                continue
        except requests.exceptions.RequestException as e:
            print(f"API call failed (retry {retries+1}/{max_retries}): {e}")
            time.sleep(2 ** retries)
            retries += 1
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
            time.sleep(2 ** retries)
            retries += 1

    print("Failed to generate keywords after multiple retries.")
    return [
        "allbirds shoes", "rothys shoes", "sustainable sneakers",
        "wool runners", "tree dashers", "best comfortable travel shoes",
        "allbirds review", "rothys flats", "allbirds vs rothys"
    ]


def llm_group_keywords(keywords_data, brand_name, competitor_name):
    """
    Simulates an LLM grouping keywords into ad groups based on intent.
    """
    ad_groups = {
        "Brand Terms": [],
        "Product/Service Category": [],
        "Competitor Terms": [],
        "Long-Tail / Informational": [],
        "Location-Based Queries": []
    }

    brand_keywords_regex = r'\b(?:' + '|'.join([
        brand_name.replace('.', '\\.?'), 'allbirds', 'all birds', 'wool runners', 'tree dashers'
    ]) + r')\b'
    competitor_keywords_regex = r'\b(?:' + '|'.join([
        competitor_name.replace('.', '\\.?'), 'rothys', 'rothys shoes', 'reputation.com'
    ]) + r')\b'
    
    brand_pattern = re.compile(brand_keywords_regex, re.IGNORECASE)
    competitor_pattern = re.compile(competitor_keywords_regex, re.IGNORECASE)

    for item in keywords_data:
        kw = item['keyword'].lower()
        
        is_brand_term = bool(brand_pattern.search(kw))
        is_competitor_term = bool(competitor_pattern.search(kw))

        if is_brand_term and not is_competitor_term:
            ad_groups["Brand Terms"].append(item)
        elif is_competitor_term:
            ad_groups["Competitor Terms"].append(item)
        elif "shoes" in kw or "sneakers" in kw or "runners" in kw or "flats" in kw or \
             "marketing platform" in kw or "seo" in kw or "ads optimization" in kw or "reputation management" in kw:
            ad_groups["Product/Service Category"].append(item)
        elif "new york" in kw or "los angeles" in kw or "london" in kw or "berlin" in kw or "sydney" in kw or \
             "san ramon" in kw or "chicago" in kw or "scottsdale" in kw or "lehi" in kw or \
             "liverpool" in kw or "munich" in kw or "mannheim" in kw or "hyderabad" in kw:
            ad_groups["Location-Based Queries"].append(item)
        else:
            ad_groups["Long-Tail / Informational"].append(item)
            
    for group in ad_groups:
        for item in ad_groups[group]:
            if group == "Brand Terms":
                item['suggested_match_type'] = "Exact"
            elif group == "Competitor Terms":
                item['suggested_match_type'] = "Phrase"
            elif group == "Product/Service Category" or group == "Location-Based Queries":
                item['suggested_match_type'] = "Phrase"
            else:
                item['suggested_match_type'] = "Broad"

    return ad_groups


# --- Web Scraping Function ---
def get_website_content(url):
    """Fetches and scrapes text content from a given URL."""
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')
        text = soup.get_text()
        return " ".join(text.split())
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return ""

# --- Keyword Planner Data Simulation ---
def simulate_keyword_planner_data(keywords):
    """
    Simulates fetching data from a keyword planner API alternative.
    Generates random but realistic metrics.
    """
    data = []
    for kw in keywords:
        if len(kw.split()) < 3:
            avg_monthly_searches = random.randint(1000, 100000)
            low_bid = round(random.uniform(0.5, 3.0), 2)
            high_bid = round(random.uniform(3.5, 10.0), 2)
            competition = "High" if random.random() > 0.5 else "Medium"
        else:
            avg_monthly_searches = random.randint(50, 5000)
            low_bid = round(random.uniform(0.2, 1.5), 2)
            high_bid = round(random.uniform(1.8, 5.0), 2)
            competition = "Medium" if random.random() > 0.3 else "Low"
        
        data.append({
            "keyword": kw,
            "avg_monthly_searches": avg_monthly_searches,
            "top_of_page_bid_low": low_bid,
            "top_of_page_bid_high": high_bid,
            "competition": competition
        })
    return pd.DataFrame(data)

# --- Main Logic ---
async def main():
    # Load inputs from config.yaml
    with open('config.yaml', 'r') as file:
        config = yaml.safe_load(file)

    brand_url = config['brand_website']
    competitor_url = config['competitor_website']
    service_locations = config['service_locations']
    brand_name = brand_url.replace('https://www.', '').split('.')[0]
    competitor_name = competitor_url.replace('https://www.', '').split('.')[0]


    print("--- Step 1: Collecting Inputs ---")
    print(f"Brand URL: {brand_url}")
    print(f"Competitor URL: {competitor_url}")
    print(f"Service Locations: {', '.join(service_locations)}\n")

    print("--- Step 2: Scraping Websites for Keyword Discovery ---")
    brand_content = get_website_content(brand_url)
    competitor_content = get_website_content(competitor_url)
    print("Scraping complete. Generating initial keyword ideas using Gemini API...\n")

    # Use LLM to generate initial keywords
    master_keyword_list = await llm_generate_keywords(brand_content, competitor_content, service_locations)
    
    print("--- Step 3: Simulating Keyword Planner Data ---")
    keyword_df = simulate_keyword_planner_data(master_keyword_list)
    print(f"Total keywords found (before filtering): {len(keyword_df)}\n")

    print("--- Step 4: Filtering Keywords (Search Volume > 500) ---")
    filtered_df = keyword_df[keyword_df['avg_monthly_searches'] >= 500]
    print(f"Keywords after filtering: {len(filtered_df)}\n")

    print("--- Step 5: Grouping Keywords into Ad Groups ---")
    final_keywords_dict = llm_group_keywords(filtered_df.to_dict('records'), brand_name, competitor_name)

    output_filename = "sem_deliverable_1_output.txt"
    with open(output_filename, 'w') as f:
        f.write(f"## Deliverable #1: Keyword List Grouped by Ad Groups ({brand_name})\n\n")
        f.write("Based on brand website content, competitor insights, and simulated keyword data with specific location targeting.\n\n")
        
        for ad_group, keywords in final_keywords_dict.items():
            if keywords:
                f.write(f"### Ad Group: {ad_group}\n")
                f.write("--------------------------------\n")
                
                for kw_data in keywords:
                    f.write(
                        f" - Keyword: {kw_data['keyword']}\n"
                        f"   - Suggested Match Type: {kw_data['suggested_match_type']}\n"
                        f"   - Suggested CPC Range: ${kw_data['top_of_page_bid_low']} - ${kw_data['top_of_page_bid_high']}\n"
                        f"   - Monthly Searches: {kw_data['avg_monthly_searches']}\n"
                        f"   - Competition: {kw_data['competition']}\n"
                        f"\n"
                    )
                f.write("\n")
                
    print(f"Deliverable successfully generated and saved to '{output_filename}'")

# At the very end of the cell:
await main()

--- Step 1: Collecting Inputs ---
Brand URL: https://www.allbirds.com
Competitor URL: https://www.rothys.com
Service Locations: New York, NY, Los Angeles, CA, London, UK, Berlin, Germany, Sydney, Australia

--- Step 2: Scraping Websites for Keyword Discovery ---
Scraping complete. Generating initial keyword ideas using Gemini API...

--- Step 3: Simulating Keyword Planner Data ---
Total keywords found (before filtering): 20

--- Step 4: Filtering Keywords (Search Volume > 500) ---
Keywords after filtering: 19

--- Step 5: Grouping Keywords into Ad Groups ---
Deliverable successfully generated and saved to 'sem_deliverable_1_output.txt'


In [9]:
import yaml
import requests
from bs4 import BeautifulSoup
import pandas as pd
import random
import os
import time
import json
import asyncio
import re

# --- Gemini API Configuration (Leave API key empty, Canvas will provide) ---
API_KEY = "AIzaSyCKNKW9HKVwVjQRDeT0lbSUz8Jh-FIeE9M" # If you want to use models other than gemini-2.5-flash-preview-05-20, provide an API key here. Otherwise, leave this as-is.
API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-05-20:generateContent"

# --- LLM Simulation Functions ---
async def llm_generate_keywords(brand_content, competitor_content, locations):
    """
    Generates keywords using the Gemini API based on website content.
    Includes exponential backoff for API calls.
    """
    prompt = f"""
    Analyze the following content from a brand's website and its competitor.
    Identify 10-15 highly relevant, high-intent seed keywords that a potential customer
    would use to search for these products/services. Include brand terms, competitor terms,
    and general category terms. Also, consider adding location-specific keywords for these areas: {', '.join(locations)}.
    Provide the keywords as a comma-separated list.

    Brand Content (from {brand_content[:100]}...):
    {brand_content[:1000]}

    Competitor Content (from {competitor_content[:100]}...):
    {competitor_content[:1000]}
    """

    chat_history = []
    chat_history.append({"role": "user", "parts": [{"text": prompt}]})
    payload = {"contents": chat_history}

    headers = {'Content-Type': 'application/json'}
    full_api_url = f"{API_URL}?key={API_KEY}"

    retries = 0
    max_retries = 5
    while retries < max_retries:
        try:
            response = requests.post(full_api_url, headers=headers, data=json.dumps(payload))
            response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
            result = response.json()

            if result.get("candidates") and result["candidates"][0].get("content") and result["candidates"][0]["content"].get("parts"):
                text = result["candidates"][0]["content"]["parts"][0]["text"]
                # Split the text by comma and clean up whitespace
                keywords = [kw.strip() for kw in text.split(',') if kw.strip()]
                return keywords
            else:
                print(f"LLM response structure unexpected: {result}")
                time.sleep(2 ** retries) # Exponential backoff
                retries += 1
                continue
        except requests.exceptions.RequestException as e:
            print(f"API call failed (retry {retries+1}/{max_retries}): {e}")
            time.sleep(2 ** retries) # Exponential backoff
            retries += 1
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
            time.sleep(2 ** retries) # Exponential backoff
            retries += 1

    print("Failed to generate keywords after multiple retries.")
    # Fallback to a hardcoded list if API fails after all retries
    return [
        "allbirds shoes", "rothys shoes", "sustainable sneakers",
        "wool runners", "tree dashers", "best comfortable travel shoes",
        "allbirds review", "rothys flats", "allbirds vs rothys"
    ]


def llm_group_keywords(keywords_data, brand_name, competitor_name):
    """
    Simulates an LLM grouping keywords into ad groups based on intent.
    """
    ad_groups = {
        "Brand Terms": [],
        "Product/Service Category": [],
        "Competitor Terms": [],
        "Long-Tail / Informational": [],
        "Location-Based Queries": []
    }

    brand_keywords_regex = r'\b(?:' + '|'.join([
        brand_name.replace('.', '\\.?'), 'allbirds', 'all birds', 'wool runners', 'tree dashers', 'cubehq', 'cube ai'
    ]) + r')\b'
    competitor_keywords_regex = r'\b(?:' + '|'.join([
        competitor_name.replace('.', '\\.?'), 'rothys', 'rothys shoes', 'reputation.com', 'birdeye'
    ]) + r')\b'
    
    brand_pattern = re.compile(brand_keywords_regex, re.IGNORECASE)
    competitor_pattern = re.compile(competitor_keywords_regex, re.IGNORECASE)

    for item in keywords_data:
        kw = item['keyword'].lower()
        
        is_brand_term = bool(brand_pattern.search(kw))
        is_competitor_term = bool(competitor_pattern.search(kw))

        if is_brand_term and not is_competitor_term:
            ad_groups["Brand Terms"].append(item)
        elif is_competitor_term:
            ad_groups["Competitor Terms"].append(item)
        elif "shoes" in kw or "sneakers" in kw or "runners" in kw or "flats" in kw or \
             "marketing platform" in kw or "seo" in kw or "ads optimization" in kw or "reputation management" in kw:
            ad_groups["Product/Service Category"].append(item)
        elif "new york" in kw or "los angeles" in kw or "london" in kw or "berlin" in kw or "sydney" in kw or \
             "san ramon" in kw or "chicago" in kw or "scottsdale" in kw or "lehi" in kw or \
             "liverpool" in kw or "munich" in kw or "mannheim" in kw or "hyderabad" in kw:
            ad_groups["Location-Based Queries"].append(item)
        else:
            ad_groups["Long-Tail / Informational"].append(item)
            
    # Assign match types based on ad group
    for group in ad_groups:
        for item in ad_groups[group]:
            if group == "Brand Terms":
                item['suggested_match_type'] = "Exact"
            elif group == "Competitor Terms":
                item['suggested_match_type'] = "Phrase"
            elif group == "Product/Service Category" or group == "Location-Based Queries":
                item['suggested_match_type'] = "Phrase"
            else:
                item['suggested_match_type'] = "Broad"

    return ad_groups


# --- Web Scraping Function ---
def get_website_content(url):
    """Fetches and scrapes text content from a given URL."""
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')
        text = soup.get_text()
        return " ".join(text.split())
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return ""

# --- Keyword Planner Data Simulation ---
def simulate_keyword_planner_data(keywords):
    """
    Simulates fetching data from a keyword planner API alternative.
    Generates random but realistic metrics.
    """
    data = []
    for kw in keywords:
        if len(kw.split()) < 3:
            avg_monthly_searches = random.randint(1000, 100000)
            low_bid = round(random.uniform(0.5, 3.0), 2)
            high_bid = round(random.uniform(3.5, 10.0), 2)
            competition = "High" if random.random() > 0.5 else "Medium"
        else:
            avg_monthly_searches = random.randint(50, 5000)
            low_bid = round(random.uniform(0.2, 1.5), 2)
            high_bid = round(random.uniform(1.8, 5.0), 2)
            competition = "Medium" if random.random() > 0.3 else "Low"
        
        data.append({
            "keyword": kw,
            "avg_monthly_searches": avg_monthly_searches,
            "top_of_page_bid_low": low_bid,
            "top_of_page_bid_high": high_bid,
            "competition": competition
        })
    return pd.DataFrame(data)

# --- Functions for Deliverables #2 and #3 Output ---

def calculate_shopping_bids(shopping_budget, conversion_rate, keywords_data, is_product_based):
    """
    Calculates and suggests CPC bids for a Manual Shopping Campaign.
    """
    if not is_product_based or shopping_budget == 0:
        return "\n## Deliverable #3: Suggested CPC Bids for Manual Shopping Campaign\n\nNot Applicable for this service-based brand or zero budget.\n"

    average_order_value = 150 # Hypothetical AOV for a product like Allbirds
    target_cpa = average_order_value * 0.20 # 20% of AOV
    target_cpc = target_cpa * (conversion_rate / 100) # Convert % to decimal

    bid_suggestions = f"""
## Deliverable #3: Suggested CPC Bids for Manual Shopping Campaign

### Methodology & Calculations:
- **Simulated Average Order Value (AOV):** ${average_order_value}
- **Target CPA (20% of AOV):** ${target_cpa:.2f}
- **Target Conversion Rate:** {conversion_rate}%
- **Calculated Target CPC:** ${target_cpc:.2f} (This is our maximum profitable bid per click)

### Suggested CPC Bid Strategy for Manual Shopping Campaign:
"""

    # Filter for relevant shopping keywords (e.g., product-focused)
    shopping_keywords = [item for item in keywords_data if
                         'shoes' in item['keyword'].lower() or
                         'sneakers' in item['keyword'].lower() or
                         'runners' in item['keyword'].lower() or
                         'flats' in item['keyword'].lower()]

    if not shopping_keywords:
        bid_suggestions += "\nNo relevant product keywords found for Shopping campaign based on current data."
        return bid_suggestions

    # Sort by monthly searches for prioritization
    shopping_keywords.sort(key=lambda x: x['avg_monthly_searches'], reverse=True)

    for kw_data in shopping_keywords[:5]: # Take top 5 for example
        suggested_bid = target_cpc * 0.8 # Start with a conservative bid relative to target CPC

        if kw_data['competition'] == "High" and kw_data['top_of_page_bid_low'] > target_cpc:
            suggested_bid = max(target_cpc * 1.2, kw_data['top_of_page_bid_low'] * 0.8) # Bid higher to compete
        elif kw_data['competition'] == "Medium" and kw_data['top_of_page_bid_low'] > target_cpc:
            suggested_bid = target_cpc * 1.1 # Slightly above target CPC
        
        suggested_bid = max(0.1, suggested_bid) # Ensure bid is at least 0.10

        bid_suggestions += f"""
- **Product Keyword:** "{kw_data['keyword']}"
  - **Monthly Searches:** {kw_data['avg_monthly_searches']}
  - **Competition:** {kw_data['competition']}
  - **Top of Page Bid Range:** ${kw_data['top_of_page_bid_low']} - ${kw_data['top_of_page_bid_high']}
  - **Suggested Manual Bid:** ${suggested_bid:.2f}
"""
    bid_suggestions += "\n*Note: Bids are suggestions and should be continuously optimized based on live campaign performance and ROAS goals.*\n"
    return bid_suggestions

def generate_pmax_themes(ad_groups, brand_name, competitor_name):
    """
    Generates strategic themes for Performance Max Campaign.
    """
    themes = f"""
## Deliverable #2: Search Themes for Performance Max Campaign ({brand_name})

These themes are derived from high-performing keyword categories and ad groups, guiding the creation of asset groups for optimal PMax campaign performance.

### Product/Service Category Themes:
"""

    if ad_groups["Product/Service Category"]:
        product_keywords = [item['keyword'] for item in ad_groups["Product/Service Category"]]
        themes += f"""
- **Core Offerings:** Focus on the primary products/services.
    - Examples: "{product_keywords[0]}"{f", \"{product_keywords[1]}\"" if len(product_keywords) > 1 else ""}
- **Specific Product/Service Lines:** Break down into more granular offerings.
    - Examples: "Sustainable Sneakers", "AI Marketing Automation" (generic examples, adapt to actual keywords)
"""
    else:
        themes += "\n- No specific product/service category themes identified based on current keywords."

    themes += """
### Use-Case Based Themes:
"""
    if ad_groups["Long-Tail / Informational"]:
        informational_keywords = [item['keyword'] for item in ad_groups["Long-Tail / Informational"]]
        themes += f"""
- **Problem/Solution Focused:** Address specific customer needs.
    - Examples: "{informational_keywords[0]}"{f", \"{informational_keywords[1]}\"" if len(informational_keywords) > 1 else ""}
- **Value Proposition:** Highlight key benefits.
    - Examples: "eco-friendly footwear", "AI-driven growth" (generic examples, adapt to actual keywords)
"""
    else:
        themes += "\n- No specific use-case based themes identified based on current keywords."

    themes += """
### Competitive Themes:
"""
    if ad_groups["Competitor Terms"]:
        competitor_keywords = [item['keyword'] for item in ad_groups["Competitor Terms"]]
        themes += f"""
- **Direct Competitor Targeting:** Capture users searching for rivals.
    - Examples: "{competitor_keywords[0]}"{f", \"{competitor_keywords[1]}\"" if len(competitor_keywords) > 1 else ""}
- **Comparison Queries:** Engage users comparing brands.
    - Examples: "{brand_name} vs {competitor_name}"
"""
    else:
        themes += f"\n- No specific competitor themes identified based on current keywords."

    themes += """
### Location-Based Themes:
"""
    if ad_groups["Location-Based Queries"]:
        location_keywords = [item['keyword'] for item in ad_groups["Location-Based Queries"]]
        themes += f"""
- **Geographic Targeting:** Focus on specific service areas or store locations.
    - Examples: "{location_keywords[0]}"{f", \"{location_keywords[1]}\"" if len(location_keywords) > 1 else ""}
"""
    else:
        themes += "\n- No specific location-based themes identified based on current keywords."

    return themes


# --- Main Logic ---
async def main():
    # Load inputs from config.yaml
    with open('config.yaml', 'r') as file:
        config = yaml.safe_load(file)

    brand_url = config['brand_website']
    competitor_url = config['competitor_website']
    service_locations = config['service_locations']
    shopping_budget = config['ad_budgets']['shopping_ads']

    brand_name = brand_url.replace('https://www.', '').split('.')[0]
    competitor_name = competitor_url.replace('https://www.', '').split('.')[0]

    # Determine if the brand is product-based for Shopping Ads applicability
    is_product_based = (shopping_budget > 0) # Simple heuristic: if shopping budget > 0, assume product-based

    print("--- Step 1: Collecting Inputs ---")
    print(f"Brand URL: {brand_url}")
    print(f"Competitor URL: {competitor_url}")
    print(f"Service Locations: {', '.join(service_locations)}")
    print(f"Shopping Budget: ${shopping_budget}, Search Budget: ${config['ad_budgets']['search_ads']}, PMax Budget: ${config['ad_budgets']['pmax_ads']}\n")

    print("--- Step 2: Scraping Websites for Keyword Discovery ---")
    brand_content = get_website_content(brand_url)
    competitor_content = get_website_content(competitor_url)
    print("Scraping complete. Generating initial keyword ideas using Gemini API...\n")

    # Use LLM to generate initial keywords
    master_keyword_list = await llm_generate_keywords(brand_content, competitor_content, service_locations)
    
    print("--- Step 3: Simulating Keyword Planner Data ---")
    keyword_df = simulate_keyword_planner_data(master_keyword_list)
    # Convert DataFrame to list of dicts for easier manipulation in grouping/output functions
    keywords_with_data_list = keyword_df.to_dict('records')
    print(f"Total keywords found (before filtering): {len(keywords_with_data_list)}\n")

    print("--- Step 4: Filtering Keywords (Search Volume > 500) ---")
    filtered_keywords_list = [kw for kw in keywords_with_data_list if kw['avg_monthly_searches'] >= 500]
    print(f"Keywords after filtering: {len(filtered_keywords_list)}\n")

    print("--- Step 5: Grouping Keywords into Ad Groups ---")
    final_ad_groups = llm_group_keywords(filtered_keywords_list, brand_name, competitor_name)

    # --- Generate Combined Output File ---
    output_filename = "sem_plan_full_output.txt"
    with open(output_filename, 'w') as f:
        # Deliverable #1 Content
        f.write(f"## Deliverable #1: Keyword List Grouped by Ad Groups ({brand_name})\n\n")
        f.write("Based on brand website content, competitor insights, and simulated keyword data with specific location targeting.\n\n")
        
        for ad_group_name, keywords_in_group in final_ad_groups.items():
            if keywords_in_group:
                f.write(f"### Ad Group: {ad_group_name}\n")
                f.write("--------------------------------\n")
                
                for kw_data in keywords_in_group:
                    f.write(
                        f" - Keyword: {kw_data['keyword']}\n"
                        f"   - Suggested Match Type: {kw_data['suggested_match_type']}\n"
                        f"   - Suggested CPC Range: ${kw_data['top_of_page_bid_low']} - ${kw_data['top_of_page_bid_high']}\n"
                        f"   - Monthly Searches: {kw_data['avg_monthly_searches']}\n"
                        f"   - Competition: {kw_data['competition']}\n"
                        f"\n"
                    )
                f.write("\n")
        
        # Deliverable #2 Content
        pmax_output = generate_pmax_themes(final_ad_groups, brand_name, competitor_name)
        f.write(pmax_output)
        f.write("\n\n") # Add extra newlines for separation

        # Deliverable #3 Content
        shopping_bids_output = calculate_shopping_bids(shopping_budget, 2, filtered_keywords_list, is_product_based)
        f.write(shopping_bids_output)
        f.write("\n") # Add final newline

    print(f"All deliverables successfully generated and saved to '{output_filename}'")

# This is for running in Jupyter Notebook
await main()


--- Step 1: Collecting Inputs ---
Brand URL: https://www.allbirds.com
Competitor URL: https://www.rothys.com
Service Locations: New York, NY, Los Angeles, CA, London, UK, Berlin, Germany, Sydney, Australia
Shopping Budget: $4000, Search Budget: $5000, PMax Budget: $2500

--- Step 2: Scraping Websites for Keyword Discovery ---
Scraping complete. Generating initial keyword ideas using Gemini API...

--- Step 3: Simulating Keyword Planner Data ---
Total keywords found (before filtering): 17

--- Step 4: Filtering Keywords (Search Volume > 500) ---
Keywords after filtering: 17

--- Step 5: Grouping Keywords into Ad Groups ---
All deliverables successfully generated and saved to 'sem_plan_full_output.txt'


In [1]:
pip show streamlit

Note: you may need to restart the kernel to use updated packages.




In [2]:
!pip install streamlit requests beautifulsoup4 pandas pyyaml

Collecting streamlit
  Downloading streamlit-1.48.1-py3-none-any.whl.metadata (9.5 kB)
Collecting altair!=5.4.0,!=5.4.1,<6,>=4.0 (from streamlit)
  Downloading altair-5.5.0-py3-none-any.whl.metadata (11 kB)
Collecting blinker<2,>=1.5.0 (from streamlit)
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<7,>=4.0 (from streamlit)
  Downloading cachetools-6.1.0-py3-none-any.whl.metadata (5.4 kB)
Collecting click<9,>=7.0 (from streamlit)
  Downloading click-8.2.1-py3-none-any.whl.metadata (2.5 kB)
Collecting pillow<12,>=7.1.0 (from streamlit)
  Downloading pillow-11.3.0-cp313-cp313-win_amd64.whl.metadata (9.2 kB)
Collecting protobuf<7,>=3.20 (from streamlit)
  Downloading protobuf-6.32.0-cp310-abi3-win_amd64.whl.metadata (593 bytes)
Collecting pyarrow>=7.0 (from streamlit)
  Downloading pyarrow-21.0.0-cp313-cp313-win_amd64.whl.metadata (3.4 kB)
Collecting tenacity<10,>=8.1.0 (from streamlit)
  Downloading tenacity-9.1.2-py3-none-any.whl.metadata (1.2 kB)
Co

In [3]:
import streamlit
import requests
import bs4 # BeautifulSoup is part of the bs4 package
import pandas
import yaml # PyYAML is imported as yaml

print(f"Streamlit version: {streamlit.__version__}")
print(f"Requests version: {requests.__version__}")
print(f"BeautifulSoup4 (bs4) version: {bs4.__version__}")
print(f"Pandas version: {pandas.__version__}")
print(f"PyYAML (yaml) version: {yaml.__version__}")

Streamlit version: 1.48.1
Requests version: 2.32.3
BeautifulSoup4 (bs4) version: 4.13.4
Pandas version: 2.3.1
PyYAML (yaml) version: 6.0.2


In [1]:
import os
print(os.getcwd())

C:\Users\omen\sem assigment
