In [1]:
# %%
# Install required packages (run only if not already installed)
!pip install -q sentence-transformers pinecone-client python-dotenv pandas ollama


In [2]:
# %%
# Import required libraries
import os
from dotenv import load_dotenv
import pandas as pd
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone
import ollama


In [3]:

# %%
# Load API keys and environment variables from .env file
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_ENV = os.getenv("PINECONE_ENV")
assert PINECONE_API_KEY and PINECONE_ENV, "Pinecone API key or environment not found in .env!"


In [4]:
# %%
# Initialize Pinecone client and connect to your index
pc = Pinecone(api_key=PINECONE_API_KEY)
index_name = "funding-search"
index = pc.Index(index_name)


In [5]:
# %%
# Load the embedding model ONCE for efficiency
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")


In [6]:
# %%
# Retrieve top-k relevant funding programs from Pinecone for a given query
def retrieve_docs(query, top_k=5, model=embedding_model):
    """
    Retrieve top-k relevant funding programs from Pinecone for a given query.
    Returns a list of formatted strings for LLM context.
    """
    # Encode the query to get its vector
    query_vector = model.encode(query).tolist()
    # Query Pinecone for top-k matches
    search_results = index.query(vector=query_vector, top_k=top_k, include_metadata=True)
    docs = []
    for match in search_results.get("matches", []):
        meta = match.get("metadata", {}) or {}
        doc_text = (
            f"Name: {meta.get('name', '')}\n"
            f"Description: {meta.get('description', '')}\n"
            f"Eligibility: {meta.get('eligibility', '')}\n"
            f"Amount: {meta.get('amount', '')}\n"
            f"Domain: {meta.get('domain', '')}\n"
            f"Location: {meta.get('location', '')}\n"
            f"Procedure: {meta.get('procedure', '')}\n"
            f"URL: {meta.get('url', '')}\n"
            f"Source: {meta.get('source', '')}"
        )
        docs.append(doc_text)
    return docs


In [7]:
# %%
# Generate a RAG answer using Mistral via Ollama's chat API
def rag_answer(query, top_k=5, model_name="mistral"):
    """
    Generate a RAG answer for a user's query using Pinecone retrieval and Mistral LLM via Ollama.
    Uses the chat API for better instruction following.
    """
    # Retrieve relevant funding documents
    docs = retrieve_docs(query, top_k=top_k)
    # Combine docs as context for the LLM
    context = "\n\n".join(docs)
    # Compose a clear, instruction-focused prompt
    prompt = (
        "You are an expert on public funding for businesses in Germany. "
        "Based only on the following funding program data, answer the user's question concisely and factually. "
        "If the answer is not present in the data, say so.\n\n"
        f"Funding programs:\n{context}\n\n"
        f"User question: {query}\n"
        "Answer:"
    )
    # Use Ollama's chat API for better context handling
    response = ollama.chat(
        model=model_name,
        messages=[{'role': 'user', 'content': prompt}]
    )
    # Return the generated answer text
    return response['message']['content']


In [9]:
# %%
# Example usage: Ask a question and get a concise, grounded answer
query = "How many funding programs are available in Germany for robotics, and what are their names?"
answer = rag_answer(query, top_k=5, model_name="mistral")  # Use "mistral" or "mixtral" as installed
print("Q:", query)
print("A:", answer)


Q: How many funding programs are available in Germany for robotics, and what are their names?
A:  There is one funding program named "Research and development (InnoTop)" available in Rhineland-Palatinate, Germany that supports projects related to robotics. However, it's important to note that other regions within Germany might offer different funding programs for the same domain. For more comprehensive information about available funding options across all of Germany, I would recommend visiting the official German Federal Ministry of Education and Research (BMBF) website or a database like the one provided by the "Förderdatenbank" linked in your question.


In [None]:
# %%
# Optional: Table-based semantic search for UI or analysis
def semantic_search(query, top_k=5, model=embedding_model):
    """
    Returns a DataFrame of the top-k relevant funding programs for a given query.
    """
    query_vector = model.encode(query).tolist()
    search_results = index.query(vector=query_vector, top_k=top_k, include_metadata=True)
    matches = search_results.get("matches", [])
    results = []
    for match in matches:
        meta = match.get("metadata", {}) or {}
        results.append({
            "name": meta.get("name", ""),
            "description": meta.get("description", ""),
            "eligibility": meta.get("eligibility", ""),
            "amount": meta.get("amount", ""),
            "domain": meta.get("domain", ""),
            "location": meta.get("location", ""),
            "procedure": meta.get("procedure", ""),
            "url": meta.get("url", ""),
            "source": meta.get("source", ""),
            "score": match.get("score", None)
        })
    return pd.DataFrame(results)



In [None]:
# %%
# Example: Display search results as a table
results_df = semantic_search("AI funding support for tech startups in Germany", top_k=5)
display(results_df)
