In [43]:
import os
import requests

# Create the 'data' directory if it doesn't exist
if not os.path.exists('data'):
    os.makedirs('data')

# Download the first file
url1 = 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10q/uber_10q_march_2022.pdf'
file_path1 = './data/uber_10q_march_2022.pdf'
response1 = requests.get(url1)
with open(file_path1, 'wb') as file:
    file.write(response1.content)

# Download the second file
url2 = 'https://meetings.wmo.int/Cg-19/PublishingImages/SitePages/FINAC-43/7%20-%20EC-77-Doc%205%20Financial%20Statements%20for%202022%20(FINAC).pptx'
file_path2 = './data/presentation.pptx'
response2 = requests.get(url2)
with open(file_path2, 'wb') as file:
    file.write(response2.content)

print('Files downloaded successfully!')

Files downloaded successfully!


In [44]:
import os
import nest_asyncio  # noqa: E402
nest_asyncio.apply()

In [45]:
#Getting the LLAMA_CLOUD_API_KEY
from dotenv import load_dotenv
load_dotenv()

True

In [46]:
#Importing the LLAMAPARSE
from llama_parse import LlamaParse

In [47]:
llamaparse_api_key = os.getenv('LLAMA_CLOUD_API_KEY')
#llamaparse_api_key

In [48]:
#Check for the documents parse using LlamaParse using Cloud
#llama_parse_documents = LlamaParse(api_key=llamaparse_api_key, result_type="markdown").load_data("./data/presentation.pptx")
#llama_parse_documents = LlamaParse(api_key=llamaparse_api_key, result_type="markdown").load_data("./data/uber_10q_march_2022.pdf")

In [49]:
import pickle

In [50]:
#Defiing the function to tload parsed data if available, or parse if not available
def laod_or_parse_data():
    data_file = "./data/parsed_data.pkl"

    if os.path.exists(data_file):
        with open(data_file, "rb") as f:
            parsed_data = pickle.load(f)
    #If the file does not exist, parse the data
    else:
        llama_parse_documents = LlamaParse(api_key = llamaparse_api_key, result_type="markdown").load_data(["./data/presentation.pptx","./data/uber_10q_march_2022.pdf"])

        #Save the parsed data of the llama_parse_documents
        with open(data_file, "wb") as f:
            pickle.dump(llama_parse_documents, f)

        #Set the parsed data to the variable
        parsed_data = llama_parse_documents

    return parsed_data

In [51]:
#Calling the function to either load or parse the data
llama_parse_documents = laod_or_parse_data()

In [52]:
len(llama_parse_documents)

2

In [53]:
llama_parse_documents[0].text[:200]

'# Document\n\n# UNITED STATES SECURITIES AND EXCHANGE COMMISSION Washington, D.C. 20549\n\n## FORM 10-Q\n\n(Mark One) ☒ QUARTERLY REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 193'

In [54]:
type(llama_parse_documents)

list

### Storing the parsed data tot he qdrant vector stores

In [55]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex, StorageContext

import qdrant_client

In [56]:
qdrant_url = os.getenv("QDRANT_URL")
qdrant_api_key = os.getenv("QDRANT_API_KEY")

### FastEmbedEmbedings

In [57]:
from llama_index.embeddings.fastembed import FastEmbedEmbedding
embed_model = FastEmbedEmbedding(model_name="BAAI/bge-base-en-v1.5")

Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

In [58]:
### Setting embed_model other than openAI 
from llama_index.core import Settings

Settings.embed_model = embed_model

In [59]:
## Creating the Groq API

from llama_index.llms.groq import Groq
groq_api_key = os.getenv("GROQ_API_KEY")

In [60]:
groq_api_key

'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'

In [70]:
llm = Groq(model="mixtral-8x7b-32768", api_key=groq_api_key)
#llm = Groq(model="gemma-7b-it", api_key=groq_api_key)
#llm = Groq(model="llama2-70b-4096", api_key=groq_api_key)

In [71]:
###### Ollama #######
## Setting LLM other than openAI 
Settings.llm = llm

client = qdrant_client.QdrantClient(api_key=qdrant_api_key, url=qdrant_url,)

vector_store = QdrantVectorStore(client = client, collection_name='qdrant_url')
Storage_Context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents=llama_parse_documents, storage_context=Storage_Context)

In [72]:
### PERSIST_INDEX ####

query_engine = index.as_query_engine()

query = "What is the letter of credit AS of December 31, 2021"
response = query_engine.query(query)
print(response)

The context information does not provide the letter of credit as of December 31, 2021.
