In [16]:
import pandas as pd
import numpy as np
import json
import re
import os
from paths import DATA_FOLDER, PROCESSED_DATA_FOLDER, MODELS_FOLDER, ML_FOLDER

from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers.utils import cached_file, WEIGHTS_NAME
from transformers.utils import TRANSFORMERS_CACHE
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import torch

In [10]:
DATA_FOLDER = DATA_FOLDER
PROCESSED_DATA_FOLDER = PROCESSED_DATA_FOLDER
MODELS_FOLDER = MODELS_FOLDER
ML_FOLDER = ML_FOLDER
MODEL_ID = "BAAI/bge-m3"

In [11]:
CODE_MODEL_ID = "Qwen/Qwen3-0.6B"

In [12]:
model = AutoModelForCausalLM.from_pretrained(CODE_MODEL_ID)

config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.50G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [21]:
model.save_pretrained(f'{ML_FOLDER}/Qwen3-0.6B')

In [14]:
print(f"Default cache dir: {AutoModelForCausalLM.from_pretrained.__defaults__}")

Default cache dir: None


In [15]:
print(f"Transformers is using cache: {TRANSFORMERS_CACHE}")

Transformers is using cache: D:\ML\HFcache\hub


In [17]:
basic_rag_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a senior C# programmer that writes code based on the provided context.

    Instructions:
    - Read the provided context carefully
    - If multiple documents are provided, synthesize information from all relevant sources
    - Use only the information provided in the context
    - If the context doesn't contain enough information, say so and then try to write what you can
    - Be accurate and cite specific parts of the context when possible
    - Follow C# best practices and naming conventions
    - Include necessary using statements
    - Add brief comments for complex logic

    Response format:
    - Write at the start (AS A COMMENT) your confidence level (High/Medium/Low)
    - Plain code which can be straightforward pasted to the code editor
    - No markdown formatting, just plain C# code
    """),
    ("human", """Context: {context}

    Request: {question}

    Please provide a comprehensive answer following the instructions above.

    Answer:""")
])

In [20]:
def create_rag_chain(retriever, llm, prompt_template):
    """Create a RAG chain with retriever, LLM, and prompt"""



    def format_docs(result):
        """Format retrieved documents for the prompt"""

        documents = result['documents'][0]
        metadatas = result['metadatas'][0]
        distances = result['distances'][0]
        ids = result['ids'][0]

        context_for_llm = []
        for i, (doc, metadata, distance, doc_id) in enumerate(zip(documents, metadatas, distances, ids)):
            context_for_llm.append({
                'rank': i + 1,
                'content': doc,
                'category': metadata['category'],
                'id': doc_id,
                'similarity_score': distance
            })

            formatted_context = ""
            for item in context_for_llm:
                formatted_context += f"""
                --- Document {item['rank']} (ID: {item['id']}) ---
                Category: {item['category']}
                Similarity Score: {item['similarity_score']:.4f}
                Content:
                {item['content']}

                """

        return formatted_context.strip()

    # Create the chain
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt_template
        | llm
        | StrOutputParser()
    )

    return rag_chain




In [None]:
class CustomChromaRetriever:
    """Custom retriever for ChromaDB with enhanced functionality"""

    def __init__(self, collection, k=10):
        self.collection = collection
        self.k = k

    def invoke(self, query):
        """Retrieve documents for a given query"""
        results = self.collection.query(
            query_texts=[query],
            n_results=self.k
        )
        return results

    def get_relevant_documents(self, query):
        """Alternative method for compatibility"""
        return self.invoke(query)

In [None]:

# retriever = CustomChromaRetriever(collection, k=3)