# Search Engine Optimization (seo)-brochure-generator_LLM

In [1]:
import os
import requests
import json
import qrcode
import langdetect
import re
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

# Load API Key
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
openai = OpenAI(api_key=api_key)
MODEL = 'gpt-4o-mini'

# Headers for web scraping
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """Scrapes website content and extracts links and text."""
    
    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        
        self.title = soup.title.string if soup.title else "No title found"
        self.base_url = "{0.scheme}://{0.netloc}".format(requests.utils.urlparse(url))
        
        # Remove irrelevant elements
        if soup.body:
            for element in soup.body(["script", "style", "img", "input"]):
                element.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        
        # Extract hyperlinks and convert to absolute URLs
        self.links = []
        for link in soup.find_all('a', href=True):
            href = link.get('href')
            if href.startswith("http"):  # Absolute URL
                self.links.append(href)
            elif href.startswith("/"):  # Relative URL
                self.links.append(self.base_url + href)
            elif not href.startswith("javascript") and not href.startswith("#"):  # Other valid formats
                self.links.append(self.base_url + "/" + href)
        
        # Ensure unique links only
        self.links = list(set(self.links))

    def get_contents(self):
        """Returns cleaned website content along with a list of clickable related links."""
        content = f"## {self.title}\n\n{self.text[:4000]}"
        if self.links:
            content += "\n\n### Related Links\n"
            for link in self.links:
                content += f"- <a href=\"{link}\" target=\"_blank\">{link}</a>\n"
        return content




def detect_language(text):
    """Detect the most probable language of the input text."""
    return langdetect.detect(text)

def generate_qr_code(url):
    """Generates a QR code for easy sharing."""
    qr = qrcode.make(url)
    qr.save("brochure_qr.png")
    print("QR Code saved as brochure_qr.png")

# LLM System Prompt
system_prompt ="""You are an expert content creator who writes concise, SEO-optimized brochures.
Ensure the content is structured in Markdown format, visually appealing, and industry-specific."""

# system_prompt = """You are an expert content creator who writes concise, SEO-optimized brochures.
# Your task is to create engaging, persuasive content in Markdown format that is visually appealing and industry-specific.
# Incorporate details such as available offers, best deals, exclusive promotions, discounts, and key value propositions.
# Highlight any special deals, limited-time offers, or unique selling points that set the company apart.
# Ensure that the brochure is both informative and appealing to potential customers.
# Ensure the content is structured in Markdown format, visually appealing, and industry-specific."""

def format_links(content):
    """Ensures URLs and emails are correctly formatted and excludes trailing punctuation from URLs."""
    # This pattern excludes common trailing punctuation such as .,!? if present.
    url_pattern = r'(?<!\()(?<!\[)(https?://[^\s\)\]\.,!?]+)(?![\)\]])'
    content = re.sub(url_pattern, r'<a href="\1" target="_blank">\1</a>', content)

    email_pattern = r'([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})'
    content = re.sub(email_pattern, r'<a href="mailto:\1">\1</a>', content)

    return content

def get_brochure_user_prompt(company_name, url):
    """Fetches the company website, extracts text, and prepares a user prompt."""
    website = Website(url)
    detected_lang = detect_language(website.text)
    print(f"Detected website language: {detected_lang}")
    
    user_prompt = f"Generate an SEO-optimized, engaging brochure for {company_name}.\n\n"
    user_prompt += "Ensure proper Markdown formatting with headings, bullet points, and readability enhancements.\n\n"
    user_prompt += website.get_contents()  # Limit the text for LLM processing

    # user_prompt = f"Generate an SEO-optimized, engaging brochure for {company_name}.\n\n"
    # user_prompt += "Ensure proper Markdown formatting with headings, bullet points, and readability enhancements.\n"
    # user_prompt += ("Focus on highlighting any special offers, best deals, exclusive promotions, or discounts provided by the company. "
    #                 "Also, emphasize the unique selling points and value propositions evident from the website content.\n\n")
    # user_prompt += "Ensure proper Markdown formatting with headings, bullet points, and readability enhancements.\n\n"
    # user_prompt += website.get_contents()  # Limit the text for LLM processing
    
    return user_prompt, detected_lang

def create_brochure(company_name, url):
    """Generates a company brochure in Markdown format with fixed and well know links."""
    user_prompt, detected_lang = get_brochure_user_prompt(company_name, url)
    
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    
    brochure = response.choices[0].message.content
    
    # Fix links before displaying
    brochure = format_links(brochure)
    
    display(Markdown(brochure))
    return brochure, detected_lang

def get_related_video(company_name):
    """Fetches a related video link from YouTube using search."""
    search_query = company_name.replace(" ", "+")  # Format for YouTube search
    youtube_search_url = f"https://www.youtube.com/results?search_query={search_query}"
    
    response = requests.get(youtube_search_url, headers=headers)
    if response.status_code == 200:
        video_ids = re.findall(r"watch\?v=(\S{11})", response.text)
        if video_ids:
            return f"https://www.youtube.com/watch?v={video_ids[0]}"  # First video
    return None  # No video found

def translate_brochure(content, target_language, company_name):
    """Translates the brochure and dynamically embeds a related video, fixing links."""
    translation_prompt = (
        f"Translate this content into {target_language} with industry-appropriate tone. "
        "Ensure proper Markdown formatting, including clickable links.\n\n"
    )

    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": "You are a professional translator and formatter."},
            {"role": "user", "content": translation_prompt + content}
        ]
    )

    translated_content = response.choices[0].message.content

    # Fix links before displaying
    translated_content = format_links(translated_content)

    # Fetch a related video dynamically
    video_url = get_related_video(company_name)
    
    # Adding advertisement box with dynamic video
    if video_url:
        advertising_box = (
            "\n\n---\n"
            "**Advertisement:**\n"
            f"Check out this featured video about {company_name}:\n\n"
            f'<iframe width="560" height="315" '
            f'src="{video_url.replace("watch?v=", "embed/")}" '
            'title="Related Video" frameborder="0" allow="accelerometer; autoplay; clipboard-write; '
            'encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
        )
        translated_content += advertising_box
    
    display(Markdown(translated_content))
    return translated_content

def main():
    """Main function to run the brochure generation and translation process."""
    
    print("Would you like to input a website URL or search by name?")
    print("1. Enter a Website URL")
    print("2. Search by Company Name")
    choice = int(input("Enter your choice: "))
    
    if choice == 1:
        url = input("Enter the website URL: ")
        company_name = url.split(".")[1].capitalize()
    elif choice == 2:
        company_name = input("Enter the company name: ")
        url = f"https://www.{company_name.lower()}.com"
    else:
        print("Invalid choice. Exiting.")
        return
    
    # Generate the original brochure
    brochure, detected_lang = create_brochure(company_name, url)
    
    print("Choose a language for translation:")
    language_options = ["English", "Spanish", "French", "German", "Chinese", "Japanese", "Arabic"]
    
    for i, lang in enumerate(language_options, 1):
        print(f"{i}. {lang}")
    
    choice = int(input("Enter the number for the target language: "))
    target_language = language_options[choice - 1]
    print(f"Translating brochure to: {target_language}")
    
    # Generate translated brochure
    translated_brochure = translate_brochure(brochure, target_language, company_name)
    
    # Generate QR Code
    generate_qr_code(url)
    
    print("Brochure created and translated successfully!")

if __name__ == "__main__":
    main()


Would you like to input a website URL or search by name?
1. Enter a Website URL
2. Search by Company Name


Enter your choice:  1
Enter the website URL:  https://www.stc.com.sa/content/stc/sa/en/personal/home.html


Detected website language: en


# Welcome to STC KSA

**Your Trusted Partner for Connectivity and Technology Solutions**

---

### Our Services

From personal plans to business solutions, STC KSA is your go-to provider for a wide range of telecom services. Explore our offerings tailored to your needs!

#### Personal
- **Mobile Plans**: Postpaid, Prepaid, Roaming Packages
- **Home Internet**: Baity Packages, Fiber Solutions
- **Devices**: Smartphones, Tablets, Renewed Devices, Smart Devices
- **Lifestyle Services**: STC Play, STC TV, STC Pay

#### Business
- **Office Solutions**: Quicknet, Maktabi Packages, Business Postpaid
- **Connectivity**: Dedicated Internet Access, Fiber Services, Mobile Data
- **Security Solutions**: DDoS Protection, Cloud Security, Managed SOC

#### Carrier & Wholesale
- **Voice & Roaming**: International Call Services, Roaming Packages
- **Connectivity Services**: National and International VPN, Data Services

---

### Why Choose STC?

- **Comprehensive Coverage**: Our network spans across the Kingdom, ensuring you stay connected wherever you are.
- **Innovative Solutions**: We offer cutting-edge technology tailored for individual users and businesses alike.
- **Customer-Centric**: Our customer support team is always ready to assist you with inquiries and solutions.

---

### Stay Connected with Us

**Discover More**: Explore our website for details on our packages and services.

- [Personal Services](https://www.stc.com.sa/content/stc/sa/en/personal.html)
- [Business Solutions](https://www.stc.com.sa/content/stc/sa/en/business.html)
- [Devices & Gadgets](https://www.stc.com.sa/content/stc/sa/en/personal/devices.html)
- [Support & FAQs](https://www.stc.com.sa/content/stc/sa/en/personal/support/contact-us/customer-care.html)

### Join the STC Family Today!

Get started with STC for seamless connectivity and innovative solutions tailored to your needs. 

**Follow Us**:  
- [Twitter](https://twitter.com/stc_ksa) 
- [Instagram](https://www.instagram.com/stc_ksa) 
- [Facebook](https://www.facebook.com/stc.ksa) 

---

**Contact Us**: For any inquiries or support, visit our website or reach out through our customer service hotline.

### *Your Connectivity, Our Commitment!*

---

For more information, visit **[STC KSA](https://www.stc.com.sa/)**.

Choose a language for translation:
1. English
2. Spanish
3. French
4. German
5. Chinese
6. Japanese
7. Arabic


Enter the number for the target language:  7


Translating brochure to: Arabic


# مرحبًا بكم في STC KSA

**شريككم الموثوق في حلول الاتصال والتكنولوجيا**

---

### خدماتنا

من الخطط الشخصية إلى الحلول التجارية، تُعتبر STC KSA مزودكم الرئيسي لمجموعة واسعة من خدمات الاتصالات. استكشف عروضنا المصممة وفقًا لاحتياجاتك!

#### شخصية
- **خطط الهاتف المحمول**: خطط مفوترة، مسبقة الدفع، باقات التجوال
- **الإنترنت المنزلي**: باقات بايتي، حلول الألياف الضوئية
- **الأجهزة**: الهواتف الذكية، الأجهزة اللوحية، الأجهزة المجددة، الأجهزة الذكية
- **خدمات نمط الحياة**: STC Play، STC TV، STC Pay

#### الأعمال
- **حلول المكاتب**: Quicknet، باقات مكتبي، هاتف مفوتر للأعمال
- **الاتصال**: الوصول إلى الإنترنت المخصص، خدمات الألياف الضوئية، بيانات الهاتف المحمول
- **حلول الأمان**: حماية من هجمات DDoS، أمان السحابة، SOC مُدارة

#### الجملة والناقل
- **الصوت والتجوال**: خدمات المكالمات الدولية، باقات التجوال
- **خدمات الاتصال**: VPN وطنية ودولية، خدمات البيانات

---

### لماذا تختار STC؟

- **تغطية شاملة**: تمتد شبكتنا عبر المملكة، مما يضمن لك الاتصال في كل مكان.
- **حلول مبتكرة**: نقدم تكنولوجيا متطورة مصممة للمستخدمين الأفراد والشركات على حد سواء.
- **تركيز على العميل**: فريق دعم العملاء لدينا جاهز دائمًا لمساعدتك في الاستفسارات والحلول.

---

### تبقَ على اتصال معنا

**اكتشف المزيد**: استعرض موقعنا الإلكتروني لمزيد من التفاصيل حول باقاتنا وخدماتنا.

- [الخدمات الشخصية](https://www.stc.com.sa/content/stc/sa/en/personal.html)
- [حلول الأعمال](https://www.stc.com.sa/content/stc/sa/en/business.html)
- [الأجهزة والأدوات](https://www.stc.com.sa/content/stc/sa/en/personal/devices.html)
- [الدعم والأسئلة الشائعة](https://www.stc.com.sa/content/stc/sa/en/personal/support/contact-us/customer-care.html)

### انضم إلى عائلة STC اليوم!

ابدأ رحلتك مع STC لتجربة اتصال سلسة وحلول مبتكرة تناسب احتياجاتك.

**تابعنا**:  
- [تويتر](https://twitter.com/stc_ksa)  
- [إنستغرام](https://www.instagram.com/stc_ksa)  
- [فيسبوك](https://www.facebook.com/stc.ksa)  

---

**تواصل معنا**: لأي استفسارات أو دعم، تفضل بزيارة موقعنا الإلكتروني أو اتصل بخط خدمة العملاء.

### *اتصالك، التزامنا!*

---

للمزيد من المعلومات، قم بزيارة **[STC KSA](https://www.stc.com.sa/)**.

---
**Advertisement:**
Check out this featured video about Stc:

<iframe width="560" height="315" src="https://www.youtube.com/embed/bdj4DZNTd3I" title="Related Video" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>

QR Code saved as brochure_qr.png
Brochure created and translated successfully!
