In [1]:
# import pandas as pd
# import matplotlib.pyplot as plt
# from tiktoken import encoding_for_model, get_encoding
# from langchain.document_loaders import PyPDFLoader
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.embeddings import OpenAIEmbeddings
# from langchain.vectorstores import FAISS
# from langchain.llms import OpenAI

In [1]:
import os

with open('../API_KEY', 'r') as f:
    os.environ['OPENAI_API_KEY'] = f.read()

## Load the text files and split into chunks

Splitting up into smaller chunks so that we can lookup only relevant parts at a time

In [50]:
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [51]:
loader = DirectoryLoader('./igor_help_files_txt', glob="**/*.txt", loader_cls = TextLoader)
docs = loader.load()
print(f'Num docs: {len(docs)}')
print(f'Meta data looks like: {docs[0].metadata}')
print(f'Beginning of content looks like: {docs[0].page_content[:50]}')

Num docs: 50
Meta data looks like: {'source': 'igor_help_files_txt\\3D Graphics.txt'}
Beginning of content looks like: 


z
	3D Graphics
Igor can create various kinds of


In [52]:
tiktoken.encoding_for_model('gpt-3.5-turbo')
encoder = tiktoken.get_encoding('cl100k_base')
encoder

<Encoding 'cl100k_base'>

In [53]:
import tiktoken

def len_tokens(text: str):
    encoder = tiktoken.get_encoding('cl100k_base')
    return len(encoder.encode(text))

In [62]:
text_splitter = RecursiveCharacterTextSplitter(
    
    # chunk_size=200,
    # chunk_overlap=40,
    chunk_size=3000,
    chunk_overlap=20,
    length_function=len_tokens,

)

In [63]:
texts = text_splitter.create_documents(texts=[docs[0].page_content])
len(texts)
for text in texts[:30]:
    print(len_tokens(text.page_content))

1124
2838
1241
2609
2886
2987
2571
2244
2824
2972
2636
2804
2452
2401
2483
2587
2483
2360
2210
167


In [71]:
docs[0].metadata
for text in texts[:20]:
    print(text.page_content[:100])
    print('---')
    print(text.page_content[-100:])
    print('\n\n---\n\n')

z
	3D Graphics
Igor can create various kinds of 3D graphics including:
	Surface Plots
	3D Scatter Pl
---
 This depends on your graphics hardware, graphics driver version and graphics acceleration settings.


---


Gizmo Guided Tour
The tutorials in the following sections will give you a sense of Gizmo's basic cap
---
 revisit the tour later and is not strictly necessary.
Gizmo 3D Scatter Plot and Fitted Surface Tour


---


Gizmo 3D Scatter Plot and Fitted Surface Tour
In this tour we will create a 3D scatter plot from a t
---
on Tour.pxp".
This is just in case you want to revisit the tour later and is not strictly necessary.


---


That concludes the Gizmo guided tour. There are more examples below. Also choose File->Example Exper
---
utes
Internal attributes are built into objects. For example, the New Sphere dialog looks like this:


---


The draw style, normals, orientation and color settings are internal attributes of the sphere object
---
nslate, data={1,0,0}
ModifyGizmo setDis

In [72]:
raise NotImplementedError(f'Should figure out what is happening above (apparently some very random looking text still in these .txt files) first')

NotImplementedError: Should figure out what is happening above (apparently some very random looking text still in these .txt files) first

In [59]:
texts = text_splitter.create_documents(texts=[d.page_content for d in docs], metadatas=[d.metadata for d in docs])
print(len(texts))
print(texts[0])
print(texts[-20])

5377
page_content='z\n\t3D Graphics\nIgor can create various kinds of 3D graphics including:\n\tSurface Plots\n\t3D Scatter Plots\n\t3D Bar Plots\n\tPath Plots\n\tRibbon Plots\n\n\tIsosurface Plots\n\tVoxelgram Plots\nImage Plots, Contour Plots and Waterfall Plots are considered 2D graphics and are discussed in other sections of the help.' metadata={'source': 'igor_help_files_txt\\3D Graphics.txt'}
page_content='#include <Scatter Plot Matrix>\nContains procedures to build a matrix of scatter plots from a list of waves. The resulting graph shows a matrix of scatter plots using all possible pairs of waves from the list as X and Y data.\nFor a demonstration of the use of this procedure, see the demo experiment, Scatter Plot Matrix Demo, in the Graphing Techniques folder inside the Examples folder.\n\n\n#include <SetDecadeLength>\nContains a procedure, SetDecadeLength,  which, assuming the top window is a log/log graph,sets the size of the graph\'s plot area such that the length of a decad

# Make embeddings and store in vector DB

In [60]:
embeddings = OpenAIEmbeddings()

In [61]:
raise
docsearch = FAISS.from_documents(documents=texts, embedding=embeddings)
docsearch.save_local('faiss_dbs', 'igor-test')

In [None]:
docsearch = FAISS.load_local('faiss_dbs', embedding=embeddings, index_name='igor-test')

# Test searching vector DB

In [62]:
query = "how do I make a 2D graph?"
close_docs = docsearch.similarity_search_with_score(query, k=5)
print(f'Found {len(close_docs)} docs')
for doc, score in close_docs:
    print(f'Score: {score}\nMetadata: {doc.metadata}\nContent: \n{doc.page_content}\n\n')

5


# Optional - Test with a very basic QA Chain

In [47]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

In [48]:
llm = OpenAI()
chain = load_qa_chain(llm, chain_type='stuff')  # Stuff all data in at once (Other methods might work better if more context required)

In [49]:
chain.llm_chain.prompt.template

"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:"

In [50]:
query = "how do I make a 2D graph?"
qdocs = docsearch.similarity_search(query)
response = chain.run(input_documents=qdocs, question=query)
response

' You can make a 2D graph by selecting a two-dimensional wave in the Y Waves and X Wave lists in the advanced mode of the New Graph dialog. Then click the Add button to move your selection to the trace specification list below. You can then add more trace specifications using the Add button. When you click Do It, your graph is created with all of the specified traces.'