In [1]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import PromptTemplate

# Set up OpenAI API key if using GPT-4
os.environ["OPENAI_API_KEY"] = "your_api_key"

# Load the Paper
with open("test_article.txt", "r", encoding="utf-8") as f:
    paper_text = f.read()

# Chunking the Document
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_text(paper_text)

# Create embeddings (Using Hugging Face to avoid OpenAI costs)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Convert text chunks into vector store
vector_store = FAISS.from_texts(chunks, embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

prompt = PromptTemplate.from_template(
    "Given the following documents, answer the question:\n\n{context}\n\nQuestion: {input}"
)

# Use GPT-4 for synthesis
llm = ChatOpenAI(model_name="gpt-4o")

# Create a document combination chain
combine_documents_chain = create_stuff_documents_chain(llm, prompt)

# Create the RAG retrieval chain
rag_chain = create_retrieval_chain(retriever, combine_documents_chain)

The main argument of the document is to highlight the role and importance of the Open Source Initiative (OSI) in promoting, advocating for, and supporting open source software. It emphasizes the benefits of open source, such as higher quality, better reliability, greater flexibility, lower cost, and avoiding vendor lock-in. The OSI is presented as a key organization in building communities and standards in the open source ecosystem, helping to ensure that the freedoms and opportunities of open source software are accessible and beneficial to all.


In [None]:
query = "What is the main argument of this paper?"
response = rag_chain.invoke({"input": query})  # ✅ FIXED

print(response["answer"])

In [2]:
import openai

openai.api_key = "your_api_key"

# Llist available models
models = openai.models.list()

print([model.id for model in models.data])

['gpt-4.5-preview', 'omni-moderation-2024-09-26', 'gpt-4.5-preview-2025-02-27', 'gpt-4o-mini-audio-preview-2024-12-17', 'dall-e-3', 'dall-e-2', 'gpt-4o-audio-preview-2024-10-01', 'gpt-4o-audio-preview', 'gpt-4o-mini-realtime-preview-2024-12-17', 'gpt-4o-mini-realtime-preview', 'o1-mini-2024-09-12', 'o1-preview-2024-09-12', 'o1-mini', 'o1-preview', 'gpt-4o-mini-audio-preview', 'whisper-1', 'gpt-4-turbo', 'gpt-4o-realtime-preview-2024-10-01', 'gpt-4', 'text-embedding-3-large', 'babbage-002', 'chatgpt-4o-latest', 'tts-1-hd-1106', 'gpt-4o-audio-preview-2024-12-17', 'tts-1-hd', 'gpt-4o-mini-2024-07-18', 'tts-1', 'tts-1-1106', 'gpt-4-turbo-2024-04-09', 'gpt-4o-2024-11-20', 'davinci-002', 'gpt-3.5-turbo-1106', 'gpt-4o-2024-08-06', 'gpt-4o-mini', 'gpt-4o-2024-05-13', 'gpt-3.5-turbo-instruct', 'gpt-4o', 'gpt-3.5-turbo-instruct-0914', 'gpt-3.5-turbo-0125', 'gpt-4o-realtime-preview-2024-12-17', 'gpt-3.5-turbo', 'gpt-4o-realtime-preview', 'gpt-3.5-turbo-16k', 'gpt-4-0125-preview', 'text-embedding-