In [71]:
!pip install -q langchain==0.1.16 langchain-community==0.0.34 langchain-openai==0.1.2 pinecone-client==3.0.0 python-dotenv==1.0.1 tiktoken==0.6.0

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/199.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/199.9 kB[0m [31m2.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.9/199.9 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [61]:
import os
import time
import pinecone
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import Pinecone as PineconeVectorStore

In [None]:
os.environ["PINECONE_API_KEY"] = "your_pinecone_api_key_here"
os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here"

In [63]:
pc = pinecone.Pinecone(api_key=os.environ["PINECONE_API_KEY"])
index_name = "yardstick-free-tier"

In [72]:
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=pinecone.ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )
    print(f"Created index '{index_name}'. Wait 1 minute...")
    time.sleep(60)
else:
    print(f"Using existing index '{index_name}'")


index = pc.Index(index_name)

Using existing index 'yardstick-free-tier'


In [65]:
loader = WebBaseLoader("https://web.archive.org/web/20231021031443/https://www.yardstickassessment.com/about")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)
chunks = text_splitter.split_documents(documents)

In [66]:
class SafeOpenAI:
    def __init__(self):
        self.last_call = 0
        self.daily_usage = 0
        self.RPM_LIMIT = 3
        self.DAILY_LIMIT = 200

    def embed(self, texts):
        if self.daily_usage >= self.DAILY_LIMIT:
            raise Exception("Daily limit reached")


        elapsed = time.time() - self.last_call
        if elapsed < 60/self.RPM_LIMIT:
            time.sleep(60/self.RPM_LIMIT - elapsed)

        self.last_call = time.time()
        self.daily_usage += 1
        return OpenAIEmbeddings().embed_documents(texts)

safe_openai = SafeOpenAI()

In [67]:
loader = WebBaseLoader("https://web.archive.org/web/20231021031443/https://www.yardstickassessment.com/about")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)
chunks = text_splitter.split_documents(documents)

In [73]:
try:
    print(f"Embedding {len(chunks)} chunks (Daily usage: {safe_openai.daily_usage}/{safe_openai.DAILY_LIMIT})")

    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    vector_db = PineconeVectorStore.from_documents(
        documents=chunks,
        embedding=embeddings,
        index_name=index_name
    )

except Exception as e:
    print(f"Pinecone failed: {str(e)}")
    print("Attempting local fallback...")

    try:
        !pip install -q faiss-cpu

        from langchain_community.vectorstores import FAISS
        from langchain.embeddings import HuggingFaceEmbeddings

        local_embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )

        vector_db = FAISS.from_documents(
            documents=chunks,
            embedding=local_embeddings
        )
        print("Successfully created local FAISS vector store")

    except Exception as e:
        print(f"FAISS failed: {str(e)}")
        print("Falling back to in-memory Chroma...")

        from langchain_community.vectorstores import Chroma
        from langchain.embeddings import HuggingFaceEmbeddings

        local_embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )

        vector_db = Chroma.from_documents(
            documents=chunks,
            embedding=local_embeddings,
            persist_directory="./chroma_db"
        )
        print("Successfully created Chroma vector store")

Embedding 15 chunks (Daily usage: 3/200)
Pinecone failed: module 'pinecone' has no attribute 'Index'
Attempting local fallback...
Successfully created local FAISS vector store


In [74]:
llm = OpenAI(
    model="gpt-3.5-turbo",
    temperature=0,
    max_tokens=150
)

def ask_safely(question):
    try:
        if safe_openai.daily_usage >= safe_openai.DAILY_LIMIT:
            return "⚠️ Daily free limit reached (200 requests)"


        retriever = vector_db.as_retriever(search_kwargs={"k": 2})

        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever
        )

        elapsed = time.time() - safe_openai.last_call
        if elapsed < 60/safe_openai.RPM_LIMIT:
            time.sleep(60/safe_openai.RPM_LIMIT - elapsed)

        safe_openai.daily_usage += 1
        safe_openai.last_call = time.time()

        result = qa_chain.invoke({"query": question})
        return result.get("result", "No answer found")

    except Exception as e:
        return f"Error: {str(e)} (Usage: {safe_openai.daily_usage}/200 today)"

In [75]:
questions = [
    "What is computerized adaptive testing?",
    "How does Yardstick ensure test validity?",
    "What services does Yardstick offer?"
]

for q in questions:
    print(f"\nQ: {q}")
    print(f"A: {ask_safely(q)}")
    print(f"Remaining today: {safe_openai.DAILY_LIMIT - safe_openai.daily_usage}")


Q: What is computerized adaptive testing?
A: Error: 'FieldInfo' object has no attribute 'items' (Usage: 4/200 today)
Remaining today: 196

Q: How does Yardstick ensure test validity?
A: Error: 'FieldInfo' object has no attribute 'items' (Usage: 5/200 today)
Remaining today: 195

Q: What services does Yardstick offer?
A: Error: 'FieldInfo' object has no attribute 'items' (Usage: 6/200 today)
Remaining today: 194
