# #🧠 Mistral + Pinecone: Funding Search Engine with Local LLM

In [1]:
# %%
import os
import pandas as pd
from dotenv import load_dotenv
from pinecone import Pinecone
from ollama import Client  # <-- Ollama to use local mistral
from IPython.display import display

In [2]:
# %%
# Load environment variables
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
assert PINECONE_API_KEY, "PINECONE_API_KEY is missing"

In [3]:
# %%
# Initialize Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index("funding-search-bge")  # Update with your actual index name

In [4]:
# %%
# Initialize Ollama client
ollama_client = Client(host='http://localhost:11434')  # Default Ollama host

In [5]:
# %%
# Example query (can also be generated from PDF like before)
query = "We are an AI company focused on AI for robotics. We are focusing on research right now."

In [6]:
# %%
# Embed query using sentence-transformers locally (you already uploaded embeddings in that space)
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("BAAI/bge-small-en")
query_embedding = model.encode(query).tolist()

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [7]:
# %%
# Query Pinecone
semantic_matches = index.query(
    vector=query_embedding,
    top_k=5,
    include_metadata=True,
    namespace="open-source-v1"  # your new namespace
)

In [8]:
# %%
matches = [match["metadata"] for match in semantic_matches["matches"]]
funding_results_df = pd.DataFrame(matches)

# Reorder columns if needed
desired_order = [
    "name", "description", "domain", "eligibility", "amount",
    "deadline", "url", "id"
]
funding_results_df = funding_results_df[[col for col in desired_order if col in funding_results_df.columns]]
display(funding_results_df)

Unnamed: 0,name,description,domain,eligibility,amount,deadline,url,id
0,MASTER 2nd Open Call,MASTER supports projects that develop and vali...,VR/XR,"Universities, research institutions, education...","Up to 100,000 euros","June 12, 2025",https://www.master-xr.eu/open-calls/open-call-2/,b0e5c199bfa519c5a1d8c452edd92901
1,Promotion of interdisciplinary pilot projects ...,Guideline for the funding of interdisciplinary...,Research & Innovation (topic-specific),"Research institution, university, company",amount information not found,deadline information not found,https://www.foerderdatenbank.de/FDB/Content/DE...,7f11351b7489d381f3c2e474b06d25eb
2,"Funding for projects on the topic of ""Applicat...",Guideline for the funding of projects on the t...,"Research & Innovation (topic-specific), Health...","University, research institution, company, ass...",amount information not found,deadline information not found,https://www.foerderdatenbank.de/FDB/Content/DE...,cd21459049ca19b22a72fffcda329c79
3,FORTIS 1st Open Call,FORTIS supports projects that develop innovati...,"Human-robot interaction (HRI), multimodal comm...","Consortia of 2-3 organizations: Start-ups, SME...","Up to 250,000 euros","June 04, 2025",https://fortis-project.eu/open-call-1/,bd4c4d243afbba1223855b5ab41181a9
4,Funding of collaborative research projects in ...,Directive on the funding of collaborative rese...,Research & Innovation (topic-specific),"Research institution, university, company",amount information not found,deadline information not found,https://www.foerderdatenbank.de/FDB/Content/DE...,24418f42df5e76ad3641741782229d11


In [9]:
# %%
# Format funding blocks (same logic)
def generate_structured_funding_blocks(matches, user_query: str) -> str:
    formatted_blocks = []

    field_aliases = {
        "Amount": ["amount", "how much", "funding", "money"],
        "Deadline": ["deadline", "last date", "until", "submission date"],
        "Eligibility": ["eligible", "eligibility", "who can apply"],
        "Procedure": ["procedure", "how to apply", "application", "steps", "process"],
        "Contact": ["contact", "email", "person", "support"],
    }

    for idx, match in enumerate(matches, start=1):
        meta = match["metadata"]
        name = meta.get("name", "Unnamed")

        fields = {
            "Description": meta.get("description"),
            "Domain": meta.get("domain"),
            "Eligibility": meta.get("eligibility"),
            "Amount": meta.get("amount"),
            "Deadline": meta.get("deadline"),
            "Procedure": meta.get("procedure"),
            "Contact": meta.get("contact"),
            "URL": meta.get("url"),
        }

        missing_fields = []
        for key, value in fields.items():
            if key in field_aliases:
                if any(alias in user_query.lower() for alias in field_aliases[key]):
                    if not value or "not found" in str(value).lower():
                        missing_fields.append(key)

        block = f"""**{idx}. {name}**\n"""
        for key in ["Description", "Domain", "Eligibility", "Amount", "Deadline", "Procedure", "Contact"]:
            val = fields[key]
            if val and "not found" not in str(val).lower():
                block += f"   - **{key}**: {val}\n"

        if missing_fields:
            block += f"   - *Couldn't trace information about {', '.join(missing_fields)}.*\n"

        if fields["URL"]:
            block += f"   - **For more information visit**: {fields['URL']}\n"

        formatted_blocks.append(block)

    return "\n".join(formatted_blocks)

In [10]:
# %%
# Create formatted funding output
semantic_output = generate_structured_funding_blocks(semantic_matches["matches"], query)
print(semantic_output)

**1. MASTER 2nd Open Call**
   - **Description**: MASTER supports projects that develop and validate innovative XR-based educational content for robotics training. The aim is to create practical training scenarios that promote the use of XR technologies in industrial robotics and give students and specialists access to modern learning tools.  Further information can be found here
   - **Domain**: VR/XR
   - **Eligibility**: Universities, research institutions, educational institutions, SMEs and large companies
   - **Amount**: Up to 100,000 euros
   - **Deadline**: June 12, 2025
   - **For more information visit**: https://www.master-xr.eu/open-calls/open-call-2/

**2. Promotion of interdisciplinary pilot projects on the topic of "Neurobiologically inspired artificial intelligence"**
   - **Description**: Guideline for the funding of interdisciplinary pilot projects on the topic of "Neurobiologically inspired artificial intelligence" dated: 25.11.2024 Federal Ministry of Education and 

In [26]:

# %%
# Prepare prompt for Mistral
llm_prompt = f"""
The company described itself as:

"{query}"

Here are the top 5 most relevant public funding programs in Germany, based on a semantic search match to their needs:

{semantic_output}

Now:

Please write a concise and professional recommendation containing **only the top 2–3 most relevant funding programs** in this format:

Only select the top programs that most directly match the company’s domain, maturity stage (e.g., early-stage research), or funding needs. Ignore entries that are vague or poorly aligned.

For each recommendation, follow this format exactly:

1. <Program Name>  
**Why it fits**: <Brief explanation why this program is suitable>  
**Domain**: <Domain>  
**Description**: <1–3 sentence summary of what the program funds and its focus>  
**Eligibility**: <Eligibility>  
**Amount**: <Amount>  
**Deadline**: <Deadline>  
**Contact**: <Contact> 
**Next Steps**:  
- <Step 1> Visit the official call page: <Program URL>  
- <Step 2> Provide one or two additional helpful steps based on available info (e.g., forming a consortium, preparing documents, contacting support, etc.)   
- <Step 3> (Optional) Include any additional steps if mentioned in the procedure  

If any field like **Amount**, **Deadline**, **Eligibility**, **Procedure**, or **Contact** is missing, either omit the line or say “Not specified”.

Use simple bullet points under **Next Steps**. Only list the top 2 or 3 programs — not all 5.
"""


In [27]:
# # Save LLM prompt to a .txt file so it can be used by Ollama
# with open("llm_prompt_mistral.txt", "w") as f:
#     f.write(llm_prompt)

In [28]:
# %%
# Run Mistral via Ollama
response = ollama_client.chat(
    model="mistral",  # or llama3.2 if you prefer
    messages=[
        {"role": "system", "content": "You are an expert in funding opportunities."},
        {"role": "user", "content": llm_prompt}
    ]
)

In [29]:
# %%
# Show LLM recommendation
print("\n🧾 Mistral Recommendation:\n")
print(response['message']['content'])


🧾 Mistral Recommendation:

 1. **MASTER 2nd Open Call**
   - **Why it fits**: This program focuses on developing XR-based educational content for robotics training, which aligns with your company's focus on AI for robotics and research.
   - **Domain**: VR/XR
   - **Description**: Funding for innovative XR-based educational content in industrial robotics.
   - **Eligibility**: Universities, research institutions, educational institutions, SMEs and large companies
   - **Amount**: Up to 100,000 euros
   - **Deadline**: June 12, 2025
   - **Contact**: Not specified
   - **Next Steps:**
     - Visit the official call page: [MASTER 2nd Open Call](https://www.master-xr.eu/open-calls/open-call-2/)
     - Prepare a compelling proposal that highlights how your research can contribute to practical learning tools in industrial robotics

2. **FORTIS 1st Open Call**
   - **Why it fits**: This program focuses on developing innovative solutions for safe, trustworthy and efficient human-robot intera