In [21]:
""" !pip install llama-index
!pip install llama-index-llms-openai
!pip install llama-index-embeddings-openai
!pip install llama-index-vector-stores-faiss """

' !pip install llama-index\n!pip install llama-index-llms-openai\n!pip install llama-index-embeddings-openai\n!pip install llama-index-vector-stores-faiss '

In [22]:
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.readers.file import PagedCSVReader
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core import VectorStoreIndex
import faiss
import os
import pandas as pd
from dotenv import load_dotenv
import logging
from datetime import datetime
import time

# Set up logging configuration
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(f'rag_openai_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Start timing
start_time = time.time()
logger.info("Starting RAG OpenAI application...")

# Load environment variables
logger.info("Loading environment variables...")

# Load environment variables from a .env file
load_dotenv()

# Set the OpenAI API key environment variable
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')
if os.environ["OPENAI_API_KEY"] is None:
    logger.error("OPENAI_API_KEY not found in environment variables")
    raise ValueError("OPENAI_API_KEY not found in environment variables. Please check your .env file.")
logger.info("Environment variables loaded successfully")


# Llamaindex global settings for llm and embeddings
logger.info("Initializing LLM and embedding settings...")
EMBED_DIMENSION=512
Settings.llm = OpenAI(model="gpt-4o-mini")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small", dimensions=EMBED_DIMENSION)
logger.info("LLM and embedding settings initialized")


2025-02-04 11:23:24,309 - INFO - Starting RAG OpenAI application...
2025-02-04 11:23:24,309 - INFO - Loading environment variables...
2025-02-04 11:23:24,311 - INFO - Environment variables loaded successfully
2025-02-04 11:23:24,313 - INFO - Initializing LLM and embedding settings...
2025-02-04 11:23:24,314 - INFO - LLM and embedding settings initialized


In [23]:
logger.info("Loading CSV file...")
file_path = 'Data Collection/nasa_lessons_learned_jet_propulsion_PROPER.csv'
data = pd.read_csv(file_path)
logger.info(f"Loaded CSV file with {len(data)} rows")
data.head()

2025-02-04 11:23:24,331 - INFO - Loading CSV file...
2025-02-04 11:23:24,367 - INFO - Loaded CSV file with 422 rows


Unnamed: 0,url,subject,abstract,driving_event,lessons_learned,recommendations,evidence,program_relation,program_phase,mission_directorate,topics
0,https://llis.nasa.gov/lesson/374,Higher Data Rates Possible Through Station Arr...,Because the MVM '73 project went to a degraded...,A previous failure of the onboard tape recorde...,The use of DSN arraying schemes can provide hi...,The project and the DSN must foresee and plan ...,JPL has referenced this lesson learned as addi...,,,,
1,https://llis.nasa.gov/lesson/400,Spacecraft Structure Dynamical Interaction wit...,"As Mariner 10 approached Venus encounter, an u...",As Mariner 10 (MVM'73) was nearing encounter w...,Spacecraft structural dynamical interactions w...,"During the spacecraft design phase, considerat...",JPL has referenced the lesson as supporting Pa...,,,,
2,https://llis.nasa.gov/lesson/486,Conduct Post Test Examination of Lot Acceptanc...,During lot acceptance test of ten Mars Observe...,As part of the Mars Observer mission failure i...,Successful lot functional acceptance tests may...,Alert users of NSIs to the possibility of dama...,JPL has referenced this lesson learned as addi...,,,,
3,https://llis.nasa.gov/lesson/708,Plasma Noise in EMI Design,Preferred Practice for Design & Test. Electrom...,This Lesson Learned is based on Reliability Pr...,Unpredictable operational anomalies and compro...,Missions with payloads that can interact stron...,This practice has been used on Atmospheric Tra...,,,Aeronautics Research Human Exploration and Ope...,
4,https://llis.nasa.gov/lesson/11501,MSL Actuator Design Process Escape,The mechanical design process for the Mars Sci...,The ambitious MSL mission presented some major...,"See ""Recommendations.""",Reference (4) provides a list of flaws in the ...,JPL has referenced this lesson learned as addi...,,Implementation » Phase C,Human Exploration and Operations Science,Engineering Design Manufacturing and Assembly ...


In [24]:
logger.info("Creating FAISS vector store...")
fais_index = faiss.IndexFlatL2(EMBED_DIMENSION)
vector_store = FaissVectorStore(faiss_index=fais_index)
logger.info("FAISS vector store created")

2025-02-04 11:23:24,395 - INFO - Creating FAISS vector store...
2025-02-04 11:23:24,396 - INFO - FAISS vector store created


In [25]:
# Set up document reader
logger.info("Setting up document reader...")
csv_reader = PagedCSVReader()
reader = SimpleDirectoryReader(
    input_files=[file_path],
    file_extractor={".csv": csv_reader}
)
docs = reader.load_data()
logger.info(f"Loaded {len(docs)} documents")

2025-02-04 11:23:24,417 - INFO - Setting up document reader...


2025-02-04 11:23:24,476 - INFO - Loaded 422 documents


In [26]:
# Check a sample chunk
print(docs[0].text)

url: https://llis.nasa.gov/lesson/374
subject: Higher Data Rates Possible Through Station Arraying
abstract: Because the MVM '73 project went to a degraded telemetry mode after failure of an onboard tape recorder, the project had to use DSN arraying schemes to provide higher mission telemetry data rates. The project and the DSN must foresee and plan the use of complex and previously unused capabilities sufficiently in advance to allow the proper analysis and configuration testing to be completed.
driving_event: A previous failure of the onboard tape recorder had made the "full resolution, full frame 117.6 kbps" mode the primary telecommunications mode at the second Mercury encounter. The increased Mercury to Earth range, relative to first Mercury encounter, required (1) a communications improvement of at least one dB over the performance obtained at the first Mercury encounter, and (2) commitment by the MVM '73 project to a bit error rate higher than .0333. The Telecommunications Divis

In [27]:
# Create and run ingestion pipeline
logger.info("Starting document ingestion pipeline...")
pipeline = IngestionPipeline(
    vector_store=vector_store,
    documents=docs
)
nodes = pipeline.run()
logger.info(f"Created {len(nodes)} nodes from documents")

2025-02-04 11:23:24,502 - INFO - Starting document ingestion pipeline...
2025-02-04 11:23:27,798 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-02-04 11:23:31,177 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-02-04 11:23:34,367 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-02-04 11:23:36,912 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-02-04 11:23:39,363 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-02-04 11:23:41,212 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-02-04 11:23:42,602 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-02-04 11:23:43,123 - INFO - Created 648 nodes from documents


In [28]:
# Create vector store index and query engine
logger.info("Creating vector store index...")
vector_store_index = VectorStoreIndex(nodes)
query_engine = vector_store_index.as_query_engine(similarity_top_k=2)
logger.info("Vector store index and query engine created")

2025-02-04 11:23:43,130 - INFO - Creating vector store index...
2025-02-04 11:23:43,319 - INFO - Vector store index and query engine created


In [29]:
def ask_question(question):
    logger.info(f"Processing question: {question}")
    query_start_time = time.time()
    response = query_engine.query(question)
    query_duration = time.time() - query_start_time
    
    logger.info(f"Question answered in {query_duration:.2f} seconds")
    logger.info("Question: " + question)
    logger.info("Answer: " + str(response.response))
    
    return response.response

In [30]:
# Example usage
if __name__ == "__main__":
    question = "What can we learn from the 2000 HESSI spacecraft overtest incident that severely damaged the spacecraft?"
    print("\nQuestion:", question)
    answer = ask_question(question)
    print("\nAnswer:", answer)
    
    # Log total execution time
    total_duration = time.time() - start_time
    logger.info(f"Total execution time: {total_duration:.2f} seconds")

2025-02-04 11:23:43,354 - INFO - Processing question: What can we learn from the 2000 HESSI spacecraft overtest incident that severely damaged the spacecraft?



Question: What can we learn from the 2000 HESSI spacecraft overtest incident that severely damaged the spacecraft?


2025-02-04 11:23:45,208 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-02-04 11:23:52,664 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-02-04 11:23:52,671 - INFO - Question answered in 9.32 seconds
2025-02-04 11:23:52,672 - INFO - Question: What can we learn from the 2000 HESSI spacecraft overtest incident that severely damaged the spacecraft?
2025-02-04 11:23:52,672 - INFO - Answer: The 2000 HESSI spacecraft overtest incident highlights several important lessons regarding environmental testing and the maintenance of test facilities. Key takeaways include:

1. **Inherent Risks of Environmental Testing**: Recognizing that environmental testing carries risks is crucial. It is essential to proactively maintain and track the mechanical health of test facilities and upgrade them as necessary.

2. **Facility Validation**: Conducting a vibration facility validation test prior to the actual test is vital. 


Answer: The 2000 HESSI spacecraft overtest incident highlights several important lessons regarding environmental testing and the maintenance of test facilities. Key takeaways include:

1. **Inherent Risks of Environmental Testing**: Recognizing that environmental testing carries risks is crucial. It is essential to proactively maintain and track the mechanical health of test facilities and upgrade them as necessary.

2. **Facility Validation**: Conducting a vibration facility validation test prior to the actual test is vital. This should include a simulated mass mock-up to calibrate the test setup effectively.

3. **Test Planning and Execution**: Clearly defining test requirements in the test plan and ensuring that test operators are adequately trained and informed is critical. This includes understanding the data available for assessing the quality of the test setup.

4. **Monitoring and Self-Checks**: Implementing self-checks and evaluating critical control system response data in r