In [1]:
# 1. uninstall the 2.x copy that pip installed
!pip install opencv-python==4.11.0.86
!pip uninstall -y numpy
# 2. put a 1.x copy back (1.26 is the last 1.x release)
!pip install "numpy>=1.24,<2" --upgrade
# 3. reinstall ChromaDB and transformers to be safe
!pip install --upgrade chromadb transformers
!pip install hf_xet

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Collecting numpy<2,>=1.24
  Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl (15.5 MB)
Installing collected packages: numpy
Successfully installed numpy-1.26.4


In [2]:
# streamlit_app.py - Lightweight deployment version
import streamlit as st
import pickle
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sentence_transformers import SentenceTransformer
import os
import chromadb

In [None]:
@st.cache_resource
def load_precomputed_data():
    """Load pre-computed documents and embeddings"""
    with open('precomputed_docs.pkl', 'rb') as f:
        data = pickle.load(f)
    return data

In [21]:
@st.cache_resource
def load_generator():
    model_id = "microsoft/phi-1.5"
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
    
    text_gen = pipeline(
        "text-generation",
        model=model_id,
        tokenizer=tokenizer,
        torch_dtype=torch.float16, 
        device_map="auto",
        trust_remote_code=True
    )
    return text_gen

def query_rag(user_query, k=3):
    results = collection.query(query_texts=[user_query], n_results=k)
    
    context_str = "\n".join(results["documents"][0])
    
    manual_prompt = f"Instruct: Use the context to answer the question.\nContext: {context_str}\nQuestion: {user_query}\nOutput:"
    
    response = generator(
        manual_prompt, 
        max_new_tokens=200, 
        temperature=0.1,
        return_full_text=False 
    )
    return response[0]['generated_text'].strip()


In [22]:
print("Testing RAG chatbot...")
response = query_rag("What is an acquisition?")
print("Response:", response)

Testing RAG chatbot...
Response: Acquisition means the acquiring by contract with appropriated funds of supplies or services (including construction)
by and for the use of the Federal Government through purchase or lease, whether the supplies or services are already in
existence or must be created, developed, demonstrated, and evaluated. Acquisition begins at the point when agency needs are
established and includes the description of requirements to satisfy agency needs, solicitation and selection of sources, award
of contracts, contract financing, contract performance, contract administration, and those technical and management functions
directly related to the process of fulfilling agency needs by contract.

7.1-4
7.105 FEDERAL ACQUISITION REGULATION
includes a cross-reference to the other definitions and that part, subpart, or section applies to the word or term when used in
that part, subpart, or section.
Acquisition means the acquiring by contract with appropriated funds of suppli

In [27]:
save_directory = "./"
text_generator.model.save_pretrained(save_directory)
text_generator.tokenizer.save_pretrained(save_directory)



Saving checkpoint shards:   0%|          | 0/1 [00:00<?, ?it/s]

('./tokenizer_config.json',
 './special_tokens_map.json',
 './vocab.json',
 './merges.txt',
 './added_tokens.json',
 './tokenizer.json')