In [1]:
import pymupdf

In [2]:
doc = pymupdf.open("doc.pdf")

In [3]:
print(doc.metadata)

{'format': 'PDF 1.7', 'title': 'Automating Ground Control Point Detection in Drone Imagery: From Computer Vision to Deep Learning', 'author': 'Gonzalo Muradás Odriozola, Klaas Pauly, Samuel Oswald and Dries Raymaekers', 'subject': 'Drone-based photogrammetry typically requires the task of georeferencing aerial images by detecting the center of Ground Control Points (GCPs) placed in the field. Since this is a very labor-intensive task, it could benefit greatly from automation. In this study, we explore the extent to which traditional computer vision approaches can be generalized to deal with variability in real-world drone data sets and focus on training different residual neural networks (ResNet) to improve generalization. The models were trained to detect single keypoints of fixed-sized image tiles with a historic collection of drone-based Red–Green–Blue (RGB) images with black and white GCP markers in which the center was manually labeled by experienced photogrammetry operators. Diff

In [4]:
print(doc.get_toc())

[[1, 'Introduction', 1], [1, 'Materials and Methods', 4], [2, 'Existing Data Set', 4], [2, 'Pre-Processing', 6], [2, 'Data Augmentation', 6], [2, 'Computer Vision Approach', 7], [2, 'Deep Learning Models', 8], [2, 'Experimental Setup', 10], [1, 'Results', 11], [2, 'Computer Vision Approach', 11], [2, 'Deep Learning Approach: Influence of Tile Size', 13], [3, 'Influence with Training', 13], [3, 'Influence with Testing', 13], [2, 'Deep Learning Approach: Main Results', 15], [2, 'Deep Learning Approach: Individual Model Analysis', 17], [2, 'Deep Learning: Training Time', 21], [2, 'Deep Learning: Prediction Time', 21], [1, 'Discussion', 21], [1, 'Additional Content', 23], [1, 'Conclusions', 25], [1, 'References', 26]]


In [5]:
text:str = ''
for page in doc:
    text += page.get_text()
text

'Citation: Muradás Odriozola, G.;\nPauly, K.; Samuel, O.; Raymaekers, D.\nAutomating Ground Control Point\nDetection in Drone Imagery: From\nComputer Vision to Deep Learning.\nRemote Sens. 2024, 16, 794. https://\ndoi.org/10.3390/rs16050794\nAcademic Editors: Lidia M. Ortega\nAlvarado and María I. Ramos Galan\nReceived: 24 November 2023\nRevised: 14 February 2024\nAccepted: 19 February 2024\nPublished: 24 February 2024\nCopyright: © 2024 by the authors.\nLicensee MDPI, Basel, Switzerland.\nThis article is an open access article\ndistributed\nunder\nthe\nterms\nand\nconditions of the Creative Commons\nAttribution (CC BY) license (https://\ncreativecommons.org/licenses/by/\n4.0/).\nremote sensing \nArticle\nAutomating Ground Control Point Detection in Drone Imagery:\nFrom Computer Vision to Deep Learning\nGonzalo Muradás Odriozola 1,2\n, Klaas Pauly 3,*\n, Samuel Oswald 3 and Dries Raymaekers 3\n1\nImage and Speech Processing (PSI), Department of Electrical Engineering (ESAT), KU Leuven,

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [7]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=5,
    length_function=len,
)

In [8]:
docs = text_splitter.create_documents([text])

In [9]:
len(docs)

161

In [3]:
from langchain_groq import ChatGroq
from dotenv import load_dotenv

load_dotenv()

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
)

ai_msq = llm.invoke("what is llm?")
print(ai_msq.content)

LLM stands for Large Language Model. It's a type of artificial intelligence (AI) designed to process and generate human-like language. LLMs are trained on vast amounts of text data, which enables them to learn patterns, relationships, and context within language.

Large Language Models are typically characterized by their ability to:

1. **Understand natural language**: LLMs can comprehend and interpret human language, including nuances, idioms, and context.
2. **Generate text**: LLMs can create coherent and contextually relevant text, such as responses to questions, summaries of articles, or even entire articles.
3. **Answer questions**: LLMs can provide accurate and informative answers to a wide range of questions, from simple queries to complex, open-ended questions.
4. **Translate languages**: LLMs can translate text from one language to another, often with high accuracy.
5. **Summarize content**: LLMs can condense long pieces of text into shorter, more digestible summaries.

LLMs 

In [4]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant that translates {input_language} to {output_language}.",
        ),
        ("human", "{input}"),
    ]
)

chain = prompt | llm
chain.invoke(
    {
        "input_language": "English",
        "output_language": "German",
        "input": "I love programming.",
    }
)

AIMessage(content='Ich liebe Programmieren.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 50, 'total_tokens': 56, 'completion_time': 0.012578753, 'prompt_time': 0.002903914, 'queue_time': 0.048653728, 'total_time': 0.015482667}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_510c177af0', 'finish_reason': 'stop', 'logprobs': None}, id='run--f6271505-eb47-494c-bc76-7cd1538ae0e8-0', usage_metadata={'input_tokens': 50, 'output_tokens': 6, 'total_tokens': 56})

In [18]:
from langchain_community.vectorstores import FAISS, InMemoryVectorStore

In [22]:
from langchain_huggingface import HuggingFaceEmbeddings   # LangChain ≥ 0.2
# (or: from langchain_community.embeddings import HuggingFaceEmbeddings)

emb = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        # model_kwargs={"device": "cuda"},
        encode_kwargs={"normalize_embeddings": True}
)

In [25]:
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

vector_db = FAISS.from_texts(
            texts=[doc.page_content for doc in docs],
            embedding=emb,          # wrapper above
            metadatas=[doc.metadata for doc in docs])    # optional
