In [4]:
import sys
!{sys.executable} -m pip uninstall -y pinecone-client

Found existing installation: pinecone-client 2.2.4
Uninstalling pinecone-client-2.2.4:
  Successfully uninstalled pinecone-client-2.2.4


In [5]:
!{sys.executable} -m pip install "pinecone-client<3.0.0"

Collecting pinecone-client<3.0.0
  Using cached pinecone_client-2.2.4-py3-none-any.whl.metadata (7.8 kB)
Using cached pinecone_client-2.2.4-py3-none-any.whl (179 kB)
Installing collected packages: pinecone-client
Successfully installed pinecone-client-2.2.4


In [6]:
!{sys.executable} -m pip show pinecone-client

Name: pinecone-client
Version: 2.2.4
Summary: Pinecone client and SDK
Home-page: https://www.pinecone.io/
Author: Pinecone Systems, Inc.
Author-email: support@pinecone.io
License: Proprietary License
Location: /Users/kiranmulawad/AI-Funding/.venv/lib/python3.12/site-packages
Requires: dnspython, loguru, numpy, python-dateutil, pyyaml, requests, tqdm, typing-extensions, urllib3
Required-by: 


In [7]:
!{sys.executable} -m pip install 'protobuf<4.0.0'


Collecting protobuf<4.0.0
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Downloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 5.29.5
    Uninstalling protobuf-5.29.5:
      Successfully uninstalled protobuf-5.29.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
protoc-gen-openapiv2 0.0.1 requires protobuf>=4.21.0, but you have protobuf 3.20.3 which is incompatible.[0m[31m
[0mSuccessfully installed protobuf-3.20.3


In [None]:
import sys
!{sys.executable} -m pip install langchain langchain-community langchain-pinecone

Collecting langchain-community
  Using cached langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting pinecone-client[grpc]
  Downloading pinecone_client-6.0.0-py3-none-any.whl.metadata (3.4 kB)
Collecting pinecone<8.0.0,>=6.0.0 (from pinecone[asyncio]<8.0.0,>=6.0.0->langchain-pinecone)
  Using cached pinecone-7.3.0-py3-none-any.whl.metadata (9.5 kB)
Downloading pinecone_client-6.0.0-py3-none-any.whl (6.7 kB)
Using cached langchain_community-0.3.27-py3-none-any.whl (2.5 MB)
Using cached pinecone-7.3.0-py3-none-any.whl (587 kB)
Installing collected packages: pinecone-client, pinecone, langchain-community
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [langchain-community]ngchain-community]
[1A[2KSuccessfully installed langchain-community-0.3.27 pinecone-7.3.0 pinecone-client-6.0.0


In [None]:
# %% LangChain version of your query engine
import os
import re
import pandas as pd
from dotenv import load_dotenv
from datetime import datetime
from dateutil import parser

from langchain.vectorstores import Pinecone as LangPinecone
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import Ollama
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.schema import Document
from pinecone import Pinecone

# %% Load .env
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_ENV = os.getenv("PINECONE_ENV", "gcp-starter")  # your pinecone environment
PINECONE_INDEX = "funding-search-bge"

# %% Setup embedding model
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")

# %% Connect to Pinecone (both LangChain + native)
pc = Pinecone(api_key=PINECONE_API_KEY)
index = LangPinecone.from_existing_index(index_name=PINECONE_INDEX, embedding=embedding_model)

# %% Query
user_query = "We are an AI company focused on AI for robotics. We are focusing on research right now."
user_location = "Rhineland-Palatinate"
funding_need = "200000"
target_domain = "AI"

# %% Get documents (metadata + vector matched)
retriever = index.as_retriever(search_kwargs={"k": 10})
docs = retriever.get_relevant_documents(user_query)

# %% Optional: custom scoring
def safe_parse_deadline(deadline_str):
    try:
        return parser.parse(deadline_str, dayfirst=True, fuzzy=True)
    except:
        return None

def compute_relevance_score(metadata):
    score = 0
    # Domain match
    if target_domain.lower() in str(metadata.get("domain", "")).lower():
        score += 0.4
    # Amount match
    try:
        amount_val = int(re.sub(r"[^\d]", "", str(metadata.get("amount", "0"))))
        if amount_val >= int(funding_need):
            score += 0.3
    except:
        pass
    # Deadline
    if "month" in str(metadata.get("deadline", "")).lower() or "2025" in str(metadata.get("deadline", "")):
        score += 0.2
    # Keyword match
    if any(word.lower() in str(metadata.get("description", "")).lower() for word in user_query.split()):
        score += 0.1
    # Location
    if user_location.lower() in str(metadata.get("location", "")).lower():
        score += 0.1
    return round(score * 100)

# %% Score and sort
scored_docs = []
for doc in docs:
    meta = doc.metadata
    meta["relevance_score"] = compute_relevance_score(meta)
    meta["days_left"] = (
        safe_parse_deadline(meta.get("deadline", "")) - datetime.now()
    ).days if meta.get("deadline") else None
    scored_docs.append(meta)

scored_df = pd.DataFrame(scored_docs)
scored_df = scored_df.sort_values("relevance_score", ascending=False)
top_matches = scored_df.head(5).to_dict("records")

# %% Format into GPT-readable blocks
def format_for_prompt(matches):
    blocks = []
    for i, meta in enumerate(matches, 1):
        block = f"""**{i}. {meta.get("name", "Unnamed")}**\n"""
        for field in ["description", "domain", "eligibility", "amount", "deadline", "procedure", "contact", "location", "source"]:
            val = meta.get(field)
            if val:
                if field == "deadline" and meta.get("days_left"):
                    val += f" (🕒 {int(meta['days_left'])} days left)"
                block += f"- **{field.capitalize()}**: {val}\n"
        if meta.get("url"):
            block += f"- **More info**: {meta['url']}\n"
        blocks.append(block)
    return "\n".join(blocks)

semantic_output = format_for_prompt(top_matches)

# %% Prepare LLM prompt
llm_prompt = f"""
The company described itself as:

"{user_query}"

Here are the top 5 most relevant public funding programs in Germany:

{semantic_output}

Now, write a concise and professional recommendation for the **top 2–3 most relevant** programs in this format:

1. <Program Name>  
Why it fits: <Reason>  
**Description**: <...>  
**Domain**: <...>  
**Eligibility**: <...>  
**Amount**: <...>  
**Deadline**: <...>  
**Location**: <...>  
**Contact**: <...>  
**Next Steps**:  
- Step 1: <...>  
- Step 2: <...>
"""

# %% LLM call (LLaMA via Ollama)
llm = Ollama(model="llama3.2")
response = llm.invoke(llm_prompt)

# %% Final Output
print("\n🧾 Recommendation:\n")
print(response)
