In [7]:
# ## 📦 Step 1: Import Libraries
import os
from openai import OpenAI
import PyPDF2
from dotenv import load_dotenv

print("✅ Libraries imported successfully!")


✅ Libraries imported successfully!


In [8]:
# ## 🔑 Step 2: Load API Key

# Load environment variables from .env file
load_dotenv()

# Get API key
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    print("❌ Error: OPENAI_API_KEY not found!")
    print("Please create a .env file with: OPENAI_API_KEY=sk-your-key-here")
else:
    print(f"✅ API Key loaded: {api_key[:10]}...{api_key[-4:]}")

# Initialize OpenAI client
client = OpenAI(api_key=api_key)
print("✅ OpenAI client initialized!")


✅ API Key loaded: sk-proj-Ld...WEEA
✅ OpenAI client initialized!


In [9]:
import os
import PyPDF2

# ## 📖 Step 3: Define PDF Text Extraction Function

def extract_text_from_pdf(pdf_path="test.pdf"):
    """
    Extract text from a PDF file located in the current directory.
    
    Args:
        pdf_path (str): Path to the PDF file (default: 'test.pdf')
        
    Returns:
        str: Extracted text from the PDF
    """
    # Check if file exists in current directory
    if not os.path.isfile(pdf_path):
        print(f"❌ Error: File '{pdf_path}' not found in current directory: {os.getcwd()}")
        return None
    
    print(f"📄 Reading PDF: {pdf_path}")
    text = ""
    
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            num_pages = len(pdf_reader.pages)
            print(f"📖 Found {num_pages} pages")
            
            for i, page in enumerate(pdf_reader.pages):
                page_text = page.extract_text() or ""
                text += page_text
                print(f"✓ Extracted page {i+1}/{num_pages}")
                
    except Exception as e:
        print(f"❌ Error extracting PDF text: {str(e)}")
        return None
    
    if not text.strip():
        print("⚠️  Warning: No text could be extracted from the PDF")
        return None
        
    print(f"✅ Successfully extracted {len(text)} characters\n")
    return text


# Example call
print("✅ Function defined: extract_text_from_pdf()")

# Run the extraction automatically for test.pdf
pdf_text = extract_text_from_pdf()

if pdf_text:
    print("\n✅ Text extraction complete!")
    print("First 500 characters:\n")
    print(pdf_text[:500] + ("..." if len(pdf_text) > 500 else ""))
else:
    print("❌ Failed to extract text from test.pdf")


✅ Function defined: extract_text_from_pdf()
📄 Reading PDF: test.pdf
📖 Found 2 pages
✓ Extracted page 1/2
✓ Extracted page 2/2
✅ Successfully extracted 5620 characters


✅ Text extraction complete!
First 500 characters:

 
CONFIDENTIAL  20.08.2025  
PROJECT NAME: Motorrad Accessory Bundles  
 
IDEA DESCRIPTION  
Briefly describe your idea to give a broad 
overview of the concept.  This project encompasses a comprehensive range of sales bundles for motorcycle accessories, covering everything from 
communication to customers and dealers. The bundles are designed based on customer use cases. Multiple parts (part numbers) a re 
combined into a bundle. These are displayed in the NBC (NewBikeConfigurator), with discou...


In [None]:
# ## 🤖 Step 4: Define OpenAI Processing Function

def process_with_openai(text, model="gpt-4o-mini", max_chars=8000):
    """
    Send extracted text to OpenAI for BMW Market Potential Hackathon context.
    
    Args:
        text (str): The extracted text to process
        model (str): OpenAI model to use
        max_chars (int): Maximum characters to send to API
        
    Returns:
        str: AI-generated business case analysis for Market Potential
    """
    print(f"🤖 Sending to OpenAI ({model})...")

    if len(text) > max_chars:
        print(f"⚠️  Text truncated from {len(text)} to {max_chars} characters")
        text = text[:max_chars]

    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are an AI business analyst for BMW's corporate innovation team, "
                        "participating in the Market Potential Hackathon. Your goal is to evaluate "
                        "innovation 1-pagers and generate the missing 'Value | Market Potential' section. "
                        "You have deep understanding of business models, TAM/SAM/SOM frameworks, "
                        "and can perform quick 'back-of-the-envelope' revenue estimations. "
                        "Be data-grounded, logical, and transparent about assumptions."
                    )
                },
                {
                    "role": "user",
                    "content": (
                        "Analyze the following project document and produce a structured 'Market Potential Summary'. "
                        "Follow this structure:\n\n"
                        "1. **Business Model Overview** — What is the core idea, who are the customers, and what is sold?\n"
                        "2. **Market Definition** — Describe which market this belongs to (e.g., motorcycle accessories, circular economy, etc.)\n"
                        "3. **Market Size Estimation** — Identify relevant TAM, SAM, SOM values with reasoning or assumptions.\n"
                        "4. **Revenue Model** — Explain expected revenue logic (per unit, subscription, partner licensing, etc.).\n"
                        "5. **Monetization Variables Table (example):**\n"
                        "   - Variable | Description | Assumed Value | Source or Rationale\n"
                        "   - Motorcycle units | Annual BMW Motorrad sales | 210,000 | BMW 2024 Annual Report\n"
                        "   - Bundle price | Average sale per accessory bundle | €350 | industry benchmark\n"
                        "   - Take rate | % of buyers purchasing bundles | 10–30% | typical retail take rates\n"
                        "6. **Example Calculation:**\n"
                        "   Revenue = Motorcycle units × Take rate × Bundle price × Market coverage × Royalty fee\n"
                        "7. **Conclusion:**\n"
                        "   - Projected 10-year potential or confidence range\n"
                        "   - Summary in one sentence (plain business English)\n\n\n"
                        f"Document to analyze:\n{text}\n\n"
                        "Provide your full reasoning, avoid fluff, and clearly label each section. "
                        "Keep your final output concise, professional, and formatted for inclusion directly into an innovation 1-pager."
                    )
                }
            ],
            max_tokens=900,
            temperature=0.6
        )

        summary = response.choices[0].message.content
        print("✅ Market Potential analysis generated successfully!\n")
        return summary

    except Exception as e:
        print(f"❌ Error processing with OpenAI: {str(e)}")
        return None


print("✅ Function updated: process_with_openai() for Hackathon Context")


✅ Function defined: process_with_openai()


In [12]:
# ## 🚀 Step 5: Process Your PDF

# ⚠️ CHANGE THIS to your PDF file path
pdf_file = "test.pdf"  # <-- Edit this line!

print("="*60)
print("📄 PDF AI PROCESSOR")
print("="*60 + "\n")

# Extract text from PDF
extracted_text = extract_text_from_pdf(pdf_file)

if extracted_text:
    print("✅ Text extraction successful! Proceeding to AI processing...\n")
else:
    print("❌ Failed to extract text. Please check your PDF file.")


📄 PDF AI PROCESSOR

📄 Reading PDF: test.pdf
📖 Found 2 pages
✓ Extracted page 1/2
✓ Extracted page 2/2
✅ Successfully extracted 5620 characters

✅ Text extraction successful! Proceeding to AI processing...



In [13]:
# ## 🤖 Step 6: Get AI Summary

if extracted_text:
    ai_summary = process_with_openai(extracted_text)
    
    if ai_summary:
        print("="*60)
        print("🤖 AI SUMMARY")
        print("="*60)
        print(ai_summary)
        print("\n" + "="*60)
    else:
        print("❌ Failed to get AI summary")
else:
    print("⚠️ No text available to process")


🤖 Sending to OpenAI (gpt-4o-mini)...
✅ AI processing complete

🤖 AI SUMMARY
**Summary of the Document: Motorrad Accessory Bundles**

**Project Overview:**
The project aims to create comprehensive sales bundles for motorcycle accessories, which include various parts tailored to customer use cases. These bundles will be displayed in the NewBikeConfigurator (NBC) with visible discounts compared to individual prices. Dealers can order bundles with a single click, facilitating ease of access to the products.

**Customer Segments:**
1. **End Customers:** Seek suitable motorcycle accessories and benefit from customized bundles.
2. **Dealers:** Aim for a simplified ordering process for BMW-tailored accessory bundles.

**Customer Problems:**
- **End Customers:** Currently face difficulties in navigating a wide range of products, leading to time consumption and lack of price transparency.
- **Dealers:** Experience complications in procuring accessories due to numerous part numbers and unclear pr

In [14]:
# ## 📋 Step 7: View Extracted Text Preview

if extracted_text:
    print("="*60)
    print("📋 EXTRACTED TEXT PREVIEW (first 1000 characters)")
    print("="*60)
    print(extracted_text[:1000] + ("..." if len(extracted_text) > 1000 else ""))
    print("\n" + "="*60)
    print(f"Total characters extracted: {len(extracted_text)}")
    print("="*60)
else:
    print("⚠️ No text available to display")


📋 EXTRACTED TEXT PREVIEW (first 1000 characters)
 
CONFIDENTIAL  20.08.2025  
PROJECT NAME: Motorrad Accessory Bundles  
 
IDEA DESCRIPTION  
Briefly describe your idea to give a broad 
overview of the concept.  This project encompasses a comprehensive range of sales bundles for motorcycle accessories, covering everything from 
communication to customers and dealers. The bundles are designed based on customer use cases. Multiple parts (part numbers) a re 
combined into a bundle. These are displayed in the NBC (NewBikeConfigurator), with discounts compared to individ ual prices where 
applicable. Dealers can order with a single click and it is easy to see whether all parts are available in the required quant ities. In 
addition, the final price is clearly visible (with price advantage vs. individual prices). Bundles must be able to be created and 
maintained by headquarters and NSC (National Sales Company).  
 
Note: This project is merely an extension and is not intended to replace any

In [15]:
# ## 💾 Optional: Save Results to File

if extracted_text and ai_summary:
    output_file = "ai_summary.txt"
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write("="*60 + "\n")
        f.write("AI SUMMARY\n")
        f.write("="*60 + "\n\n")
        f.write(ai_summary)
        f.write("\n\n" + "="*60 + "\n")
        f.write("EXTRACTED TEXT\n")
        f.write("="*60 + "\n\n")
        f.write(extracted_text)
    
    print(f"✅ Results saved to: {output_file}")
else:
    print("⚠️ No results to save")


✅ Results saved to: ai_summary.txt


In [16]:
# ## ⚙️ Advanced Example (Optional Custom Analysis)

if extracted_text:
    custom_prompt = "Extract the main findings and conclusions from this document:"
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",  # Change to "gpt-4-turbo" for better quality
        messages=[
            {"role": "system", "content": "You are an expert research analyst."},
            {"role": "user", "content": f"{custom_prompt}\n\n{extracted_text[:8000]}"}
        ],
        max_tokens=800,
        temperature=0.5
    )
    
    print("🔍 Custom Analysis:")
    print(response.choices[0].message.content)


🔍 Custom Analysis:
### Main Findings and Conclusions

**Project Overview:**
The "Motorrad Accessory Bundles" project aims to create a comprehensive range of sales bundles for motorcycle accessories, enhancing the purchasing experience for both end customers and dealers. Bundles will be tailored to customer use cases and displayed in the NewBikeConfigurator (NBC) with clear pricing and availability.

**Customer Segments:**
1. **End Customers:** They require a simplified purchasing process for motorcycle accessories that meet their specific needs.
2. **Dealers:** They seek an efficient way to view and order tailored accessory bundles with minimal effort.

**Customer Problems:**
- **End Customers:** Currently face difficulties navigating a vast array of products, leading to confusion regarding pricing and availability. This can result in receiving suboptimal products due to the dealer's procurement preferences.
- **Retailers:** Encounter challenges in sourcing accessory products, as the e