In [None]:
# # %%
# # Install required packages (if you haven't)
# import sys
# !{sys.executable} -m pip install -q openai tiktoken python-dotenv tqdm pandas pinecone fitz

In [None]:
# import sys
# !{sys.executable} -m pip install pymupdf

Collecting pymupdf
  Downloading pymupdf-1.26.3-cp39-abi3-macosx_11_0_arm64.whl.metadata (3.4 kB)
Downloading pymupdf-1.26.3-cp39-abi3-macosx_11_0_arm64.whl (22.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.4/22.4 MB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pymupdf
Successfully installed pymupdf-1.26.3


In [None]:
import fitz
print(fitz.__doc__)

PyMuPDF 1.26.3: Python bindings for the MuPDF 1.26.3 library (rebased implementation).
Python 3.12 running on darwin (64-bit).



In [10]:
# %%
import os
import pandas as pd
from dotenv import load_dotenv
from openai import OpenAI
from pinecone import Pinecone
from tqdm import tqdm
import fitz  # PyMuPDF
from IPython.display import display

In [11]:
# %%
# Load environment variables
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_ENV = os.getenv("PINECONE_ENV")

assert OPENAI_API_KEY, "Missing OPENAI_API_KEY"
assert PINECONE_API_KEY, "Missing PINECONE_API_KEY"
assert PINECONE_ENV, "Missing PINECONE_ENV"

In [12]:
# Create a Pinecone client instance
pc = Pinecone(api_key=PINECONE_API_KEY)

# Define the index name
index_name = "funding-search"   # Your index name

# (Optional) Create the index if it doesn't exist
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536,  # or whatever your embedding dimension is
        metric='cosine',  # match how you built your index
        environment=PINECONE_ENV,
    )

# Connect to the index
index = pc.Index(index_name)

In [13]:
# %%
# (Optional) Extract PDF profile text function, if using PDF profile to generate queries
def extract_text_from_pdf(pdf_path: str) -> str:
    doc = fitz.open(pdf_path)
    return "\n".join([page.get_text() for page in doc])

# Example usage:
# pdf_path = "/path/to/sample_user_profile.pdf"
# pdf_text = extract_text_from_pdf(pdf_path)
# print("PDF Text Sample:", pdf_text[:500])


In [14]:
# %%
# You can either generate a query automatically from user profile text or set manually
# Example manual query string (replace with dynamic generation if you wish)
query = "We are an AI company focused on AI for robotics. We are focusing on research right now."

In [15]:
# %%
client = OpenAI(api_key=OPENAI_API_KEY)

# Embed the query using OpenAI embedding model
query_embedding = client.embeddings.create(
    input=[query],
    model="text-embedding-3-small"
).data[0].embedding

In [16]:
# %%
# Query Pinecone index with the query embedding
# Use your appropriate namespace where embeddings were uploaded; here assumed "openai-v3"
namespace = "openai-v3"
top_k = 5  # number of results to retrieve

semantic_matches = index.query(
    vector=query_embedding,
    top_k=top_k,
    include_metadata=True,
    namespace=namespace
)

In [17]:
# %%
# Extract metadata from matches into a DataFrame for easier display and manipulation
matches = [match["metadata"] for match in semantic_matches["matches"]]
funding_results_df = pd.DataFrame(matches)

# Define your desired column order for clearer display
desired_order = [
    "name", "description", "domain", "eligibility", "amount",
    "deadline", "url", "id"
]

funding_results_df = funding_results_df[desired_order]
display(funding_results_df)

Unnamed: 0,name,description,domain,eligibility,amount,deadline,url,id
0,AIRISE Open Call,AIRISE supports SMEs and mid-caps in the imple...,AI in production,SMEs and mid-caps,"Up to 60,000 euros","July 16, 2025",https://airise.eu/calls,0965271e1a6f6f68548db796c1e7c80d
1,"Funding for projects on the topic of ""Applicat...",Guideline for the funding of projects on the t...,"Research & Innovation (topic-specific), Health...","University, research institution, company, ass...",amount information not found,deadline information not found,https://www.foerderdatenbank.de/FDB/Content/DE...,cd21459049ca19b22a72fffcda329c79
2,FORTIS 1st Open Call,FORTIS supports projects that develop innovati...,"Human-robot interaction (HRI), multimodal comm...","Consortia of 2-3 organizations: Start-ups, SME...","Up to 250,000 euros","June 04, 2025",https://fortis-project.eu/open-call-1/,bd4c4d243afbba1223855b5ab41181a9
3,Promotion of interdisciplinary pilot projects ...,Guideline for the funding of interdisciplinary...,Research & Innovation (topic-specific),"Research institution, university, company",amount information not found,deadline information not found,https://www.foerderdatenbank.de/FDB/Content/DE...,7f11351b7489d381f3c2e474b06d25eb
4,MASTER 2nd Open Call,MASTER supports projects that develop and vali...,VR/XR,"Universities, research institutions, education...","Up to 100,000 euros","June 12, 2025",https://www.master-xr.eu/open-calls/open-call-2/,b0e5c199bfa519c5a1d8c452edd92901


In [20]:
# %%
# Function: Generate multi-line formatted semantic search result text blocks for LLM or display
def generate_structured_funding_blocks(matches, user_query: str) -> str:
    formatted_blocks = []

    field_aliases = {
        "Amount": ["amount", "how much", "funding", "money"],
        "Deadline": ["deadline", "last date", "until", "submission date"],
        "Eligibility": ["eligible", "eligibility", "who can apply"],
        "Procedure": ["procedure", "how to apply", "application", "steps", "process"],
        "Contact": ["contact", "email", "person", "support"],
    }

    for idx, match in enumerate(matches, start=1):
        meta = match["metadata"]
        name = meta.get("name", "Unnamed")

        fields = {
            "Description": meta.get("description"),
            "Domain": meta.get("domain"),
            "Eligibility": meta.get("eligibility"),
            "Amount": meta.get("amount"),
            "Deadline": meta.get("deadline"),
            "Procedure": meta.get("procedure"),
            "Source": meta.get("source"),
            "Contact": meta.get("contact"),
            "URL": meta.get("url"),
        }

        missing_fields = []
        for key, value in fields.items():
            if key in field_aliases:
                if any(alias in user_query.lower() for alias in field_aliases[key]):
                    if not value or "not found" in str(value).lower():
                        missing_fields.append(key)

        block = f"**{idx}. {name}**\n"
        for key in ["Description", "Domain", "Eligibility", "Amount", "Deadline", "Procedure", "Contact", "Source"]:
            val = fields[key]
            if val and "not found" not in str(val).lower():
                block += f"   - **{key}**: {val}\n"

        if missing_fields:
            block += f"   - *Couldn't trace information about {', '.join(missing_fields)}.*\n"

        if fields["URL"]:
            block += f"   - **For more information visit**: {fields['URL']}\n"

        formatted_blocks.append(block)

    return "\n".join(formatted_blocks)

In [21]:
# %%
# Create nicely formatted semantic matches output block (plausible input for GPT prompt)
semantic_output = generate_structured_funding_blocks(semantic_matches["matches"], query)
print(semantic_output)

**1. AIRISE Open Call**
   - **Description**: AIRISE supports SMEs and mid-caps in the implementation and scaling of AI solutions in manufacturing. The Open Calls are aimed at companies that are either conducting their first AI experiments or want to bring existing applications to a higher level of maturity (TRL). Funding is available for projects in areas such as design & engineering, process monitoring, manufacturing operations, supply chain, cyber security and training Further information can be found here
   - **Domain**: AI in production
   - **Eligibility**: SMEs and mid-caps
   - **Amount**: Up to 60,000 euros
   - **Deadline**: July 16, 2025
   - **Source**: nrweuropa
   - **For more information visit**: https://airise.eu/calls

**2. Funding for projects on the topic of "Application of artificial intelligence (AI) in drug discovery"**
   - **Description**: Guideline for the funding of projects on the topic of "Application of artificial intelligence (AI) in drug discovery" dated

In [None]:
# %%
# Prepare prompt for GPT recommendation generation (you can customize further)
llm_prompt = f"""
The company described itself as:

"{query}"

Here are the top 5 most relevant public funding programs in Germany, based on a semantic search match to their needs:

{semantic_output}

Now:

Please write a concise and professional recommendation containing **only the top 2–3 most relevant funding programs** in this format:

Only select the top programs that most directly match the company’s domain, maturity stage (e.g., early-stage research), or funding needs. Ignore entries that are vague or poorly aligned.

For each recommendation, follow this format exactly:

1. <Program Name>  
Why it fits: <1–2 lines explaining relevance to the company’s domain (or) industry (or) field of work>  
**Description**: <1–3 sentence summary of what the program funds and its focus>  
**Eligibility**: <Eligibility>  
**Amount**: <Amount>  
**Deadline**: <Deadline>  
**Contact**: <Contact person, email, or organization>  
**Next Steps**:  
- Step 1: [Visit the official page:]({{url}})  
- Step 2: <One key action the company must take>  
- Step 3: <Another action (e.g., submit proposal, form consortium)>  

If any field like **Amount**, **Deadline**, **Eligibility**, **Procedure**, or **Contact** is missing, either omit the line or say “Not specified”.

Use simple bullet points under **Next Steps**. Only list the top 2 or 3 programs — not all 5.
"""


In [26]:
# %%
# Generate GPT recommendation using chat completion
response = client.chat.completions.create(
    model="gpt-4-turbo",
    messages=[
        {"role": "system", "content": "You are an expert in funding opportunities."},
        {"role": "user", "content": llm_prompt}
    ]
)

In [27]:
# Print GPT recommendation output
print("\n🧾 GPT Recommendation:\n")
print(response.choices[0].message.content)


🧾 GPT Recommendation:

1. **FORTIS 1st Open Call**  
Why it fits: Bridges AI and robotics with a direct aim to enhance human-robot interaction, which aligns with the company's focus on AI for robotics and research.  
**Description**: This program supports innovations in safe and efficient human-robot interaction involving multimodal communication and digital twins in industrial settings.   
**Eligibility**: Consortia of 2-3 organizations: Start-ups, SMEs, mid-caps, research institutions, universities  
**Amount**: Up to 250,000 euros  
**Deadline**: June 04, 2025  
**Contact**: Not specified  
**Next Steps**:  
- Visit the official call page: https://fortis-project.eu/open-call-1/  
- Review the application procedures and guidelines  

2. **MASTER 2nd Open Call**  
Why it fits: Focuses on integrating advanced technologies like XR in robotics training, aligning with AI applications in robotic contexts.  
**Description**: The call aims to develop innovative XR-based educational content 