In [None]:
from pinecone import Pinecone

api_key = "api-key-here"

index_name = "deepseek-rag"

pc = Pinecone(api_key=api_key)

index = pc.Index(index_name)

In [None]:
from sentence_transformers import SentenceTransformer

embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [None]:
import uuid

def upload_docs_to_pinecone(docs):
    vectors = embedder.encode(docs).tolist()
    ids = [str(uuid.uuid4()) for _ in docs]

    pinecone_records = [
        {"id": id_, "values": vec, "metadata": {"text": doc}}
        for id_, vec, doc in zip(ids, vectors, docs)
    ]

    index.upsert(vectors=pinecone_records)
    print(f"Uploaded {len(docs)} documents to Pinecone.")

In [None]:
upload_docs_to_pinecone([
    "Retrieval-Augmented Generation (RAG) improves LLM output by injecting relevant context.",
    "DeepSeek Coder can be used locally for fast, context-aware code generation.",
    "Pinecone enables fast vector search for dense embeddings."
])

In [None]:
def retrieve_relevant_docs(query, top_k=3):
    query_vec = embedder.encode(query).tolist()
    result = index.query(vector=query_vec, top_k=top_k, include_metadata=True)
    return [match["metadata"]["text"] for match in result["matches"]]

In [None]:
from llama_cpp import Llama

deepseek_path = r"C:\GGUF\godolike\deepseek-coder-6.7b-instruct-Q4_K_M-GGUF\deepseek-coder-6.7b-instruct-q4_k_m.gguf"

DeepSeekCode = Llama(
    model_path=deepseek_path,
    n_gpu_layers=20,
    n_ctx=2048,
    n_batch=256,
    n_threads=6,
    use_mlock=True,
    verbose=True
)

In [None]:
def rag_generate(query):
    retrieved_context = "\n\n".join(retrieve_relevant_docs(query))
    prompt = f"""You are a helpful coding assistant.

Use the following context to answer the question.

Context:
{retrieved_context}

Question:
{query}

Answer:"""

    output = DeepSeekCode(prompt, max_tokens=512, stop=["</s>"])
    return output["choices"][0]["text"].strip()