## LLama-Index

In [None]:
# !pip install llama-index
# !pip install python-pptx


In [1]:
from llama_index.core import SimpleDirectoryReader

from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceSplitter

In [2]:
# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    include_metadata=True,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

# base node parser is a sentence splitter
text_splitter = SentenceSplitter()

In [12]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_dir="../../../../../data"
).load_data()


In [19]:
print(len(documents))
print(documents[2].metadata)

14
{'page_label': '3', 'file_name': 'Artemis_19910008828.pdf', 'file_path': 'c:\\Users\\trishjam\\Documents\\trishjam\\projects\\05-nasa\\data\\Artemis_19910008828.pdf', 'file_type': 'application/pdf', 'file_size': 352848, 'creation_date': '2024-04-08', 'last_modified_date': '2024-04-08'}


In [8]:
filename = "../../../../../data/2020_ASCEND_SLS_Exergy_v01.1.pptx"
filename = "../../../../../data/SLS Liftoff Loads SciTech_v3.docx"

one_doc = SimpleDirectoryReader(
    input_files=[filename]
).load_data()


In [9]:
print(len(one_doc))
print(one_doc[0].metadata)

1
{'file_name': 'SLS Liftoff Loads SciTech_v3.docx', 'file_path': '..\\..\\..\\..\\..\\data\\SLS Liftoff Loads SciTech_v3.docx', 'file_size': 727743, 'creation_date': '2024-04-12', 'last_modified_date': '2024-04-12'}


In [10]:
print(one_doc[0].text)

Methodology and Development of SLS Liftoff Loads 



David Alldredge, Ben Jones,

Dynamic Concepts, LLC, Huntsville, AL 35806

Austin Decker,

NASA, Marshall Space Flight Center, AL, 35812

Ty Irwin, Skylar Taggart5

Dynamic Concepts, LLC, Huntsville, AL 35806

The methodologies and development of the loads experienced during liftoff of the NASA SLS Block I vehicle will be presented in this paper.  The liftoff loads analysis traditionally captures the dynamics of vehicle/pad separation after engine/booster ignition coupled with other launch day environments like wind, overpressure, and engine side loads. Additionally, the liftoff analysis includes an on-pad engine shutdown prior to booster ignition. The SLS liftoff analysis is performed using a Monte Carlo analysis and this paper will explore the Monte Carlo process and the statistical approach to calculating the limit loads.

		Introduction



The Space Launch System (SLS), composed of the Booster, Core Stage, Integrated Spacecraft an

In [14]:
nodes = node_parser.get_nodes_from_documents(documents)
base_nodes = text_splitter.get_nodes_from_documents(documents)
print(len(documents))
print(len(nodes))
print(len(base_nodes))


13
328
20


In [None]:
nodes[0].metadata

In [15]:
def pnode(node):
    print("-----------------------------------")
    print(f"WINDOW\n{node.metadata['window']}")
    print(f"\nORIGINAL\n{node.metadata['original_text']}")

for i, n in enumerate(nodes):
    pnode(n)
    if i == 10:
        break

-----------------------------------
WINDOW
PROJECT ARTEMIS/?
 f,S'23
N91-18141
MASSAC.HUSETrS INSTIT[YrE OFTECHNOLOGY
Nearly 30yearsafterJohnEKennedy expressed hisdesire
foranexpansion ofthespaceprogram oftheU.SA.,wehave
beengivenanevengreater oplx)rtunity.  President George
Bush,inJuly1989,expressed hisdesiretoonceagainexpand
thespaceprogram oftheU.S.,aswellasthatofothernations
around theworld.  Thechallenge: "Toreturn mantotheMoon,
thistimetostay.

ORIGINAL
PROJECT ARTEMIS/?

-----------------------------------
WINDOW
PROJECT ARTEMIS/?
 f,S'23
N91-18141
MASSAC.HUSETrS INSTIT[YrE OFTECHNOLOGY
Nearly 30yearsafterJohnEKennedy expressed hisdesire
foranexpansion ofthespaceprogram oftheU.SA.,wehave
beengivenanevengreater oplx)rtunity.  President George
Bush,inJuly1989,expressed hisdesiretoonceagainexpand
thespaceprogram oftheU.S.,aswellasthatofothernations
around theworld.  Thechallenge: "Toreturn mantotheMoon,
thistimetostay. "Andthen,tojourney toanother planet--
amanned mission toMars. 


In [None]:
for i, n in enumerate(base_nodes):
    print("-----------------------------------")
    print(n)

## Lang Chain

In [None]:
# !pip install langchain-community
# !pip install langchain-text-splitters

In [None]:
from langchain_community.document_loaders import PyPDFLoader


In [None]:
loader = PyPDFLoader("../../../../../data/Artemis_19910008828.pdf")

pages = loader.load()
chunks = loader.load_and_split()

print(len(pages))
print(len(chunks))


In [None]:
for p in pages:
    print('-----------------------')
    print(p.page_content)

In [None]:
chunks[0]

## Testing

In [1]:
import sys
sys.path.append("../../common/src")
sys.path.append("../src")
print(sys.path)

['c:\\Users\\trishjam\\Documents\\trishjam\\projects\\05-nasa\\code\\core-solution-services2\\components\\llm_service\\notebooks', 'C:\\Users\\trishjam\\AppData\\Local\\Programs\\Python\\Python311\\python311.zip', 'C:\\Users\\trishjam\\AppData\\Local\\Programs\\Python\\Python311\\DLLs', 'C:\\Users\\trishjam\\AppData\\Local\\Programs\\Python\\Python311\\Lib', 'C:\\Users\\trishjam\\AppData\\Local\\Programs\\Python\\Python311', 'c:\\Users\\trishjam\\Documents\\trishjam\\projects\\05-nasa\\code\\core-solution-services2\\components\\llm_service\\.v4', '', 'c:\\Users\\trishjam\\Documents\\trishjam\\projects\\05-nasa\\code\\core-solution-services2\\components\\llm_service\\.v4\\Lib\\site-packages', 'c:\\Users\\trishjam\\Documents\\trishjam\\projects\\05-nasa\\code\\core-solution-services2\\components\\llm_service\\.v4\\Lib\\site-packages\\win32', 'c:\\Users\\trishjam\\Documents\\trishjam\\projects\\05-nasa\\code\\core-solution-services2\\components\\llm_service\\.v4\\Lib\\site-packages\\win32

In [2]:
import os
from google.cloud import storage

PROJECT_ID = "nasa-genie-dev"
REGION = "us-central1"
os.environ["PROJECT_ID"] = PROJECT_ID

storage_client = storage.Client(project=PROJECT_ID)

In [3]:
from services.query import data_source

INFO: [config/config.py:57 - <module>()] Namespace File not found, setting job namespace as default


  warn_deprecated(
  warn_deprecated(
  warn_deprecated(
  warn_deprecated(
  from .autonotebook import tqdm as notebook_tqdm


INFO: [config/config.py:107 - <module>()] ENABLE_GOOGLE_LLM = True
INFO: [config/config.py:108 - <module>()] ENABLE_OPENAI_LLM = True
INFO: [config/config.py:109 - <module>()] ENABLE_COHERE_LLM = True
INFO: [config/config.py:110 - <module>()] ENABLE_GOOGLE_MODEL_GARDEN = True
INFO: [config/config.py:111 - <module>()] ENABLE_TRUSS_LLAMA2 = True
INFO: [config/vector_store_config.py:40 - <module>()] Default vector store = [matching_engine]
INFO: [config/vector_store_config.py:49 - <module>()] PG_HOST = [127.0.0.1]
INFO: [config/vector_store_config.py:50 - <module>()] PG_DBNAME = [pgvector]
ERROR: [config/vector_store_config.py:77 - <module>()] Cannot connect to pgvector instance at 127.0.0.1: (psycopg2.OperationalError) connection to server at "127.0.0.1", port 5432 failed: Connection refused (0x0000274D/10061)
	Is the server running on that host and accepting TCP/IP connections?

(Background on this error at: https://sqlalche.me/e/20/e3q8)
INFO: [config/onedrive_config.py:30 - <module>()

In [4]:
ds = data_source.DataSource(storage_client)

In [6]:
doc_name = "Artemis_2022 IEEE Aero Creech Artemis Overview V3.pdf"
# doc_name = "test-html.html"
doc_path = "../../../../../data/"

text_chunks, embed_chunks = ds.chunk_document(
    doc_name,
    "remote path",
    doc_path + doc_name
)

INFO: [query/data_source.py:135 - chunk_document()] generating index data for Artemis_2022 IEEE Aero Creech Artemis Overview V3.pdf
INFO: [query/data_source.py:221 - read_doc()] Reading pdf file Artemis_2022 IEEE Aero Creech Artemis Overview V3.pdf with 7 pages
INFO: [query/data_source.py:224 - read_doc()] Finished reading pdf file Artemis_2022 IEEE Aero Creech Artemis Overview V3.pdf


In [7]:
print(len(text_chunks))
print(len(embed_chunks))

231
231


In [11]:
for i, t in enumerate(text_chunks):
    print(len(t), len(t.split()))
    print("Window")
    print(t)
    print("Original")
    print(embed_chunks[i])
    print("-----")

44 7
Window
U.S.  Government work not protected by U.S. 
Original
U.S. 
-----
176 27
Window
U.S.  Government work not protected by U.S.  copyright  Artemis: An Overview of  
NASA’s Activities to Return Humans to the Moon  
Steve Creech  
NASA Headquarters  
300 E St. 
Original
Government work not protected by U.S. 
-----
266 39
Window
Government work not protected by U.S.  copyright  Artemis: An Overview of  
NASA’s Activities to Return Humans to the Moon  
Steve Creech  
NASA Headquarters  
300 E St.  SW  
Washington, DC 20546  
steve.creech @nasa.gov  John Guidi  
NASA Headquarters  
300 E St. 
Original
copyright  Artemis: An Overview of  
NASA’s Activities to Return Humans to the Moon  
Steve Creech  
NASA Headquarters  
300 E St. 
-----
323 46
Window
copyright  Artemis: An Overview of  
NASA’s Activities to Return Humans to the Moon  
Steve Creech  
NASA Headquarters  
300 E St.  SW  
Washington, DC 20546  
steve.creech @nasa.gov  John Guidi  
NASA Headquarters  
300 E St.  SW  
Wa