In [1]:
# Auto-install required packages
import sys
import subprocess

packages = {
    'fpdf': 'fpdf',
    'bs4': 'beautifulsoup4',
    'ollama': 'ollama-python',
    'requests': 'requests'
}

for module, package in packages.items():
    try:
        __import__(module)
    except ImportError:
        print(f"üì¶ Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package, "-q"])

print("‚úÖ All packages installed!\n")

# Now import everything
import requests
from bs4 import BeautifulSoup
import ollama
from fpdf import FPDF
import re
from urllib.parse import urljoin, urlparse
import time

# ... rest of my code continues here ...


# STEP 3: Function to Get Website Content
# This is like a robot that visits a website and brings back the text
def get_website_content(url):
    """
    Visit a website and extract all the text content
    
    Simple explanation:
    - We go to the website
    - We read all the text
    - We bring it back in a clean format
    """
    try:
        print(f"üåê Visiting website: {url}")
        
        # Set headers to pretend we're a web browser (some sites block robots)
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        
        # Get the website
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        
        # Parse the HTML (convert website code to readable format)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Remove script and style elements (we don't need JavaScript or CSS)
        for script in soup(['script', 'style', 'nav', 'footer']):
            script.decompose()
        
        # Get text
        text = soup.get_text()
        
        # Clean up: remove extra spaces and empty lines
        lines = (line.strip() for line in text.splitlines())
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        text = ' '.join(chunk for chunk in chunks if chunk)
        
        print(f"‚úÖ Successfully extracted {len(text)} characters from website")
        return text
    
    except Exception as e:
        print(f"‚ùå Error getting website: {e}")
        return None


# STEP 4: Function to Get Multiple Pages
# Some companies have important info on different pages
def get_multiple_pages(base_url, max_pages=3):
    """
    Get content from the main page and a few important sub-pages
    
    Simple explanation:
    - Start at the main page
    - Find links to other important pages (About, Services, etc.)
    - Visit those pages too
    - Combine all the information
    """
    all_content = []
    visited_urls = set()
    
    # Get main page content
    main_content = get_website_content(base_url)
    if main_content:
        all_content.append(f"MAIN PAGE:\n{main_content}")
        visited_urls.add(base_url)
    
    # Try to find and visit important pages
    try:
        response = requests.get(base_url, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Look for important pages (about, services, products, team)
        important_keywords = ['about', 'services', 'products', 'team', 'company']
        links = soup.find_all('a', href=True)
        
        page_count = 1
        for link in links:
            if page_count >= max_pages:
                break
            
            href = link['href']
            full_url = urljoin(base_url, href)
            
            # Check if this is an important page and we haven't visited it
            if any(keyword in href.lower() for keyword in important_keywords):
                if full_url not in visited_urls and urlparse(full_url).netloc == urlparse(base_url).netloc:
                    print(f"üìÑ Found important page: {full_url}")
                    content = get_website_content(full_url)
                    if content:
                        all_content.append(f"\n\nADDITIONAL PAGE ({href}):\n{content}")
                        visited_urls.add(full_url)
                        page_count += 1
                        time.sleep(1)  # Be polite, wait 1 second between requests
    
    except Exception as e:
        print(f"‚ö†Ô∏è Could not fetch additional pages: {e}")
    
    # Combine all content
    combined = "\n\n".join(all_content)
    print(f"‚úÖ Total content collected: {len(combined)} characters from {len(visited_urls)} pages")
    
    return combined


# STEP 5: Function to Ask Ollama to Analyze the Website
# This is where the AI reads the website and understands what the company does
def analyze_company_with_ollama(company_name, website_content, model="llama3.2"):
    """
    Use Ollama AI to understand the company and extract key information
    
    Simple explanation:
    - We give the website text to the AI
    - We ask specific questions about the company
    - The AI reads everything and gives us structured answers
    """
    print(f"ü§ñ Asking Ollama to analyze {company_name}...")
    
    # Limit content length to avoid overwhelming the AI
    max_content_length = 8000  # characters
    if len(website_content) > max_content_length:
        website_content = website_content[:max_content_length]
        print(f"‚ö†Ô∏è Content truncated to {max_content_length} characters")
    
    prompt = f"""
You are analyzing a company website to create a professional brochure.

Company Name: {company_name}

Website Content:
{website_content}

Please provide a structured analysis in the following format:

1. COMPANY OVERVIEW (2-3 sentences about what the company does)

2. MISSION & VISION (What is their purpose and goal?)

3. KEY PRODUCTS/SERVICES (List 3-5 main offerings)

4. TARGET AUDIENCE (Who are their customers? Clients? Investors? Recruits?)

5. COMPETITIVE ADVANTAGES (What makes them special or different?)

6. COMPANY VALUES (What principles do they operate by?)

7. KEY ACHIEVEMENTS (Any notable accomplishments, awards, or milestones?)

Be concise but informative. If information is not available, write "Not specified on website."
"""
    
    try:
        # Call Ollama
        response = ollama.chat(
            model=model,
            messages=[
                {
                    'role': 'user',
                    'content': prompt
                }
            ]
        )
        
        analysis = response['message']['content']
        print("‚úÖ Analysis complete!")
        return analysis
    
    except Exception as e:
        print(f"‚ùå Error with Ollama: {e}")
        return None


# STEP 6: Function to Generate Brochure Content
# Now we ask the AI to write the actual brochure text
def generate_brochure_content(company_name, analysis, model="llama3.2"):
    """
    Use the analysis to generate polished brochure content
    
    Simple explanation:
    - We take the analysis from step 5
    - We ask the AI to write it in a professional, marketing style
    - This becomes our brochure text
    """
    print("üìù Generating brochure content...")
    
    prompt = f"""
Based on this company analysis, create professional brochure content for {company_name}.

Analysis:
{analysis}

Write a compelling brochure with these sections:

1. HEADLINE (One powerful sentence that captures what they do)

2. ABOUT US (2-3 engaging paragraphs)

3. WHAT WE OFFER (Bullet points of services/products)

4. WHY CHOOSE US (3-5 compelling reasons)

5. OUR VALUES (Short paragraph)

6. CALL TO ACTION (Encouraging closing statement)

Write in a professional, engaging tone suitable for investors, clients, and potential recruits.
Use persuasive language but remain factual.
"""
    
    try:
        response = ollama.chat(
            model=model,
            messages=[
                {
                    'role': 'user',
                    'content': prompt
                }
            ]
        )
        
        brochure_content = response['message']['content']
        print("‚úÖ Brochure content generated!")
        return brochure_content
    
    except Exception as e:
        print(f"‚ùå Error generating brochure: {e}")
        return None


# STEP 7: Function to Create PDF Brochure
# This takes our text and makes it into a pretty PDF file
class PDF(FPDF):
    """Custom PDF class with header and footer"""
    
    def __init__(self, company_name):
        super().__init__()
        self.company_name = company_name
    
    def header(self):
        """Header appears at the top of each page"""
        self.set_font('Arial', 'B', 15)
        self.cell(0, 10, self.company_name, 0, 1, 'C')
        self.ln(5)
    
    def footer(self):
        """Footer appears at the bottom of each page"""
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
    
    def chapter_title(self, title):
        """Format for section titles"""
        self.set_font('Arial', 'B', 14)
        self.set_fill_color(200, 220, 255)
        self.cell(0, 10, title, 0, 1, 'L', 1)
        self.ln(4)
    
    def chapter_body(self, body):
        """Format for section content"""
        self.set_font('Arial', '', 11)
        # Handle special characters
        body = body.encode('latin-1', 'replace').decode('latin-1')
        self.multi_cell(0, 6, body)
        self.ln()


def create_pdf_brochure(company_name, brochure_content, website_url, filename="company_brochure.pdf"):
    """
    Create a beautiful PDF brochure
    
    Simple explanation:
    - Create a new PDF document
    - Add the company name at the top
    - Add all our brochure content with nice formatting
    - Add the website link
    - Save it as a PDF file
    """
    print(f"üìÑ Creating PDF brochure: {filename}")
    
    try:
        pdf = PDF(company_name)
        pdf.add_page()
        
        # Add brochure content
        pdf.set_font('Arial', '', 11)
        
        # Split content into sections and add to PDF
        sections = brochure_content.split('\n\n')
        
        for section in sections:
            if section.strip():
                # Check if this is a title (usually all caps or starts with number)
                if section.strip().isupper() or section[0].isdigit():
                    pdf.chapter_title(section.strip())
                else:
                    pdf.chapter_body(section.strip())
        
        # Add website at the end
        pdf.ln(10)
        pdf.set_font('Arial', 'I', 10)
        pdf.cell(0, 10, f'Visit us: {website_url}', 0, 1, 'C')
        
        # Save PDF
        pdf.output(filename)
        print(f"‚úÖ PDF brochure created successfully: {filename}")
        return filename
    
    except Exception as e:
        print(f"‚ùå Error creating PDF: {e}")
        return None


# STEP 8: Main Function - Puts Everything Together!
# This is the master function that runs all the steps
def create_company_brochure(company_name, website_url, output_filename=None):
    """
    Main function that creates the entire brochure
    
    Simple explanation:
    - Step 1: Visit the website and collect information
    - Step 2: Analyze the information with AI
    - Step 3: Generate brochure content with AI
    - Step 4: Create a beautiful PDF
    - Step 5: Done! üéâ
    """
    print("\n" + "="*60)
    print(f"üöÄ CREATING BROCHURE FOR: {company_name}")
    print("="*60 + "\n")
    
    # Set default filename if not provided
    if output_filename is None:
        output_filename = f"{company_name.replace(' ', '_')}_brochure.pdf"
    
    # STEP 1: Get website content
    print("STEP 1: Collecting website content...")
    website_content = get_multiple_pages(website_url, max_pages=3)
    
    if not website_content:
        print("‚ùå Failed to get website content. Aborting.")
        return None
    
    # STEP 2: Analyze with Ollama
    print("\nSTEP 2: Analyzing company with AI...")
    analysis = analyze_company_with_ollama(company_name, website_content)
    
    if not analysis:
        print("‚ùå Failed to analyze company. Aborting.")
        return None
    
    print("\n--- ANALYSIS ---")
    print(analysis)
    print("--- END ANALYSIS ---\n")
    
    # STEP 3: Generate brochure content
    print("STEP 3: Generating brochure content...")
    brochure_content = generate_brochure_content(company_name, analysis)
    
    if not brochure_content:
        print("‚ùå Failed to generate brochure content. Aborting.")
        return None
    
    print("\n--- BROCHURE CONTENT ---")
    print(brochure_content)
    print("--- END BROCHURE CONTENT ---\n")
    
    # STEP 4: Create PDF
    print("STEP 4: Creating PDF brochure...")
    pdf_file = create_pdf_brochure(company_name, brochure_content, website_url, output_filename)
    
    if pdf_file:
        print("\n" + "="*60)
        print("üéâ SUCCESS! Brochure created successfully!")
        print(f"üìÅ File saved as: {pdf_file}")
        print("="*60 + "\n")
        return pdf_file
    else:
        print("‚ùå Failed to create PDF. Please check errors above.")
        return None


# STEP 9: USAGE EXAMPLE
# This is how you actually use the brochure generator!

print("""
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë          COMPANY BROCHURE GENERATOR - READY TO USE!          ‚ïë
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó

HOW TO USE:
-----------
1. Make sure Ollama is running on your computer
2. Run this cell to create a brochure:

   create_company_brochure(
       company_name="Your Company Name",
       website_url="https://www.company-website.com"
   )

EXAMPLE:
--------
create_company_brochure(
    company_name="OpenAI",
    website_url="https://www.openai.com"
)

The brochure PDF will be saved in your current folder!
""")

# Uncomment the line below and add your company details:
# create_company_brochure("OpenAI", "https://www.openai.com")

üì¶ Installing fpdf...
‚úÖ All packages installed!


‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë          COMPANY BROCHURE GENERATOR - READY TO USE!          ‚ïë
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó

HOW TO USE:
-----------
1. Make sure Ollama is running on your computer
2. Run this cell to create a brochure:

   create_company_brochure(
       company_name="Your Company Name",
       website_url="https://www.company-website.com"
   )

EXAMPLE:
--------
create_company_brochure(
    company_name="OpenAI",
    website_url="https://www.openai.com"
)

The brochure PDF will be saved in your current folder!



In [2]:
create_company_brochure(
    company_name="Tesla",
    website_url="https://www.tesla.com"
)


üöÄ CREATING BROCHURE FOR: Tesla

STEP 1: Collecting website content...
üåê Visiting website: https://www.tesla.com
‚úÖ Successfully extracted 202 characters from website
‚úÖ Total content collected: 213 characters from 1 pages

STEP 2: Analyzing company with AI...
ü§ñ Asking Ollama to analyze Tesla...
‚úÖ Analysis complete!

--- ANALYSIS ---
Here's the analysis of Tesla's company website:

1. COMPANY OVERVIEW:
Tesla, Inc. is an American multinational corporation that specializes in electric vehicles, clean energy solutions, and renewable energy systems.

2. MISSION & VISION:
The mission of Tesla is to accelerate the world's transition to sustainable energy through the production of electric vehicles, solar power systems, and energy storage products. The vision is to be a leading player in the adoption of electric and renewable energy technologies globally.

3. KEY PRODUCTS/SERVICES:
* Electric Cars (Model S, Model X, Model 3, Model Y)
* Solar Panels
* Solar Roofing Systems
* Power

'Tesla_brochure.pdf'

In [3]:
create_company_brochure("Google", "https://www.google.com")


üöÄ CREATING BROCHURE FOR: Google

STEP 1: Collecting website content...
üåê Visiting website: https://www.google.com
‚úÖ Successfully extracted 120 characters from website
üìÑ Found important page: https://www.google.com/intl/ne/about.html
üåê Visiting website: https://www.google.com/intl/ne/about.html
‚úÖ Successfully extracted 1053 characters from website
‚úÖ Total content collected: 1227 characters from 2 pages

STEP 2: Analyzing company with AI...
ü§ñ Asking Ollama to analyze Google...
‚úÖ Analysis complete!

--- ANALYSIS ---
Here's the structured analysis of Google:

1. COMPANY OVERVIEW:
Google is a multinational technology company that specializes in Internet-related services and products. They offer a wide range of products and services that aim to organize and make information more easily accessible.

2. MISSION & VISION:
Not specified on website, but based on general knowledge, Google's mission appears to be "to organize the world's information and make it universally a

'Google_brochure.pdf'