In [2]:
## Load the PDF using PyPDFLoader
from langchain.document_loaders import PyPDFLoader

# Assign the PDF you want to vectorize
loader = PyPDFLoader("01_SROCC_SPM_FINAL.pdf")
pages = loader.load_and_split()


In [3]:
## Check for content
pages[0]

Document(page_content='Summary for  \nPolicymakers', metadata={'source': '01_SROCC_SPM_FINAL.pdf', 'page': 0})

In [6]:
## Set Open AI key

import os
import getpass

os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')

OpenAI API Key: ········


In [7]:
## Add to vector DB

import weaviate
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Weaviate

WEAVIATE_INDEX_NAME = "climate_change"
client = weaviate.Client(
    url="http://weaviate:8080",
    additional_headers = {
        "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]
    }
)

vectorstore = Weaviate.from_documents(
     client=client, documents=pages, embedding=OpenAIEmbeddings(), index_name=WEAVIATE_INDEX_NAME
)
retriever = vectorstore.as_retriever()

/opt/conda/lib/python3.10/site-packages/langchain/embeddings/openai.py:437: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.5/migration/
  response = response.dict()
/opt/conda/lib/python3.10/site-packages/pydantic/main.py:979: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.5/migration/


In [8]:
## Check if there is any data in the DB
client.query.aggregate(WEAVIATE_INDEX_NAME).with_meta_count().do()

{'data': {'Aggregate': {'Climate_change': [{'meta': {'count': 50}}]}}}

In [22]:
## Query some chunks
client.query.get(WEAVIATE_INDEX_NAME, ["text"]).with_limit(2).do()

{'data': {'Get': {'Climate_change': [{'text': '7SPMSummary for Policymakers\nhigh aciditylow acidityHistorical changes (observed and modelled) and projections under RCP2.6 and RCP8.5 for key indicators\nHistorical (modelled) Historical (observed) Projected (RCP2.6) Projected (RCP8.5)\n−1012345(a) Global mean surface air temperature\nchange relative to 1986−2005\n−6−4−202\n(i) Ocean oxygen (100−600 m depth)\n%\n(j) Arctic sea ice extent\n(September)\n%\n1950 2000 2050 2100(l) Near−surface permafrost area\nyear1950 2000 2050 2100\nyearºC\n%Past and future changes in the ocean and cryosphere \nchange relative to 1986−2005\nchange relative to 1986−2005\nchange relative to 1986−2005\nyear−1012345(b) Global mean sea surface temperatureºCchange relative to 1986−2005\n015101520(c) Marine heatwave daysmultiplication factorfactor of change relative to 1986−20057.87.98.08.1\npH(h) Surface ocean pH\n00.10.20.3\nmetres\n080016002400(d) Ocean heat content (0−2000 m depth)1021 Joulesand sea level equ

In [None]:
# Check the schema
client.schema.get(WEAVIATE_INDEX_NAME)