In [None]:
import difflib
import os
import tiktoken
from langchain.prompts import ChatPromptTemplate
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from IPython.display import display, Markdown

VENDOR_DOCS_FILE = "/Users/apoorvah/config-diff-analyzer/data/vendor_docs/arista_user_manual.txt"

os.environ["OPENAI_API_KEY"] = "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"

# Set up RAG (simple local embedding store)
with open(VENDOR_DOCS_FILE) as f:
    vendor_text = f.read()

splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)
vendor_docs = splitter.create_documents([vendor_text])

# 🧮 Optional: Token Counter
encoding = tiktoken.encoding_for_model("gpt-4")
def count_tokens(text):
    return len(encoding.encode(text))

print(f"Total vendor chunks: {len(vendor_docs)}")
print(f"Tokens in first chunk: {count_tokens(vendor_docs[0].page_content)}")

vectorstore = Chroma.from_documents(vendor_docs, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# Prompt Template
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a senior network engineer helping review changes in router configurations."),
    ("user", "Here is the diff between two configs:\n\n{diff}\n\nBased on the retrieved documentation:\n{context}\n\n1. Summarize what has changed.\n2. Is this safe to push live?\n3. Does this require draining?\n4. If unsure, what should be validated first?\n5. What should be monitored after the change?")
])

# Create Chain
llm = ChatOpenAI(model_name="gpt-4", temperature=0.3)
chain = prompt | llm | StrOutputParser()


Total vendor chunks: 11509
Tokens in first chunk: 3


In [None]:
# Load config files
CONFIG_PATH = "/Users/apoorvah/config-diff-analyzer/data/examples/"
device_config_path = "arista_device_config.txt"
desired_config_path = "arista_desired_config.txt"

with open(CONFIG_PATH + device_config_path) as f:
    device = f.read()

with open(CONFIG_PATH + desired_config_path) as f:
    desired = f.read()

# Generate Diff
diff_lines = list(difflib.unified_diff(
    device.splitlines(), desired.splitlines(),
    fromfile=device, tofile=desired, lineterm=""
))
diff_text = "\n".join(diff_lines)

print("\n\n".join(diff_lines))  # Print structured diff

# Retrieve context
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
retrieved_docs = retriever.get_relevant_documents(diff_text)
context = "\n---\n".join([doc.page_content for doc in retrieved_docs])

# Run Analysis
response = chain.invoke({"diff": diff_text, "context": context})
display(Markdown(response))
