In [None]:
!pip install python-pptx

Collecting python-pptx
  Downloading python_pptx-1.0.2-py3-none-any.whl.metadata (2.5 kB)
Collecting XlsxWriter>=0.5.7 (from python-pptx)
  Downloading XlsxWriter-3.2.3-py3-none-any.whl.metadata (2.7 kB)
Downloading python_pptx-1.0.2-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.8/472.8 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading XlsxWriter-3.2.3-py3-none-any.whl (169 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.4/169.4 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: XlsxWriter, python-pptx
Successfully installed XlsxWriter-3.2.3 python-pptx-1.0.2


In [None]:
import re
from pptx import Presentation
from sentence_transformers import SentenceTransformer, util


MODEL_NAME = 'all-mpnet-base-v2'
NORMALIZE_EMBEDDINGS = MODEL_NAME.startswith("BAAI/bge")


def clean_text(text):
    text = text.replace("\n", " ")
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'•', '-', text)
    text = re.sub(r'[^a-zA-Z0-9.,:\-()\'" ]', '', text)
    return text.strip().lower()


prs = Presentation("EnerNova_Pitch_Deck.pptx")

slide_knowledge = []
for slide in prs.slides:
    title = slide.shapes.title.text if slide.shapes.title else ""
    content = ""
    for shape in slide.shapes:
        if shape.has_text_frame and shape.text != title:
            content += shape.text.strip() + "\n"
    combined_text = f"{title.strip()} {content.strip()}"
    cleaned_text = clean_text(combined_text)
    if cleaned_text:
        slide_knowledge.append(cleaned_text)


print(f"Loading model: {MODEL_NAME}")
model = SentenceTransformer(MODEL_NAME)

slide_embeddings = model.encode(
    slide_knowledge,
    convert_to_tensor=True,
    normalize_embeddings=NORMALIZE_EMBEDDINGS
)


def answer_question(question):
    cleaned_q = clean_text(question)
    question_embedding = model.encode(
        cleaned_q,
        convert_to_tensor=True,
        normalize_embeddings=NORMALIZE_EMBEDDINGS
    )
    similarities = util.cos_sim(question_embedding, slide_embeddings)
    best_idx = similarities.argmax().item()
    return {
        "question": question,
        "answer": slide_knowledge[best_idx],
        "similarity_score": float(similarities[0][best_idx])
    }


questions = [
    "Project Name",
    "Primary Contact Name",
    "Primary Contact Email Address",
    "Project Summary (Max 150 words)",
    "Describe how the project advances clean energy, climate action, or sustainability.",
    "How does the project address equity and/or benefit frontline communities?",
    "How will this funding be used (e.g., equipment, labor, travel)?",
    "What are the expected outcomes of this project?",
    "How will you measure success?",
    "Is this project new or existing?",
    "What is the project timeline?",
    "Total funding request",
    "Are you applying as an individual, team, or organization?",
    "Upload any supporting materials (optional)"
]

# === PRINT MATCHED ANSWERS ===
for q in questions:
    result = answer_question(q)
    print("\n" + "="*60)
    print(f"Q: {result['question']}")
    print(f"A: {result['answer']}")
    print(f"Similarity Score: {result['similarity_score']:.2f}")


Loading model: all-mpnet-base-v2

Q: Project Name
A: thank you lets power the future, together. contact us at: helloenernova.io
Similarity Score: 0.31

Q: Primary Contact Name
A: business model subscription-based model for energy usage partnerships with local governments and ngos carbon credit monetization
Similarity Score: 0.09

Q: Primary Contact Email Address
A: business model subscription-based model for energy usage partnerships with local governments and ngos carbon credit monetization
Similarity Score: 0.08

Q: Project Summary (Max 150 words)
A: our vision to become a global leader in decentralized and affordable renewable energy technology.
Similarity Score: 0.38

Q: Describe how the project advances clean energy, climate action, or sustainability.
A: our vision to become a global leader in decentralized and affordable renewable energy technology.
Similarity Score: 0.56

Q: How does the project address equity and/or benefit frontline communities?
A: traction - 3 successful pilo