# Collect your data and store it in a pandas dataframe

In [24]:
import os
import spacy
from spacy_layout import spaCyLayout
from spacy.tokens import DocBin
from dotenv import load_dotenv

load_dotenv()

nlp = spacy.load('en_core_web_sm')
layout = spaCyLayout(nlp)

pdf_path = "CS-25.pdf"
pdf = layout(pdf_path)

In [None]:
# Ensure output folder exists
os.makedirs('docbin', exist_ok=True)

doc_bin = DocBin(store_user_data=True)
doc_bin.add(pdf)

output_path = os.path.join('docbin\\', 'CS-25.spacy')
with open(output_path, "wb") as output_file:
    output_file.write(doc_bin.to_bytes())



In [2]:
dbin = DocBin(store_user_data=True)
pdf = dbin.from_disk('docbin\\CS-25.spacy')

spacy_docs = list(dbin.get_docs(nlp.vocab))[0]

# Initialize vector database

In [3]:
from qdrant_client import models, QdrantClient
from qdrant_client.models import PointStruct
from sentence_transformers import SentenceTransformer




In [4]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [5]:
doc_embeddings = encoder.encode(spacy_docs.text.split('\n'))

In [6]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [7]:
qdrant.create_collection(
    collection_name="CS-25",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

True

In [8]:
points = []
sentences = spacy_docs.text.split('\n')
for i in range(len(doc_embeddings)):
    points.append(PointStruct(id=i,
                              vector=[doc_embeddings[i].tolist()],
                              payload={'text': sentences[i]}
                              ))

In [None]:
qdrant.upsert('CS-25', points)

In [21]:
hits = qdrant.query_points(
    collection_name="CS-25",
    query=encoder.encode("What are requirements for recertifying damaged airplane").tolist(),
    with_payload=True,
    limit=3
)
for hit in hits.points:
  print(hit.payload, "score:", hit.score)

{'text': '(i) Failure  Conditions per CS  25.671(c)(1)  and  (c)(2).  It  should  be  shown  that  the aeroplane maintains structural integrity for continued safe flight and landing. This should  be  accomplished  by  demonstrating  compliance  with  CS 25.302,  where applicable, unless otherwise agreed with EASA.'} score: 0.5730004375136402
{'text': 'Note: This paragraph applies only to aircraft with a certification basis including CS 25.571 or equivalent requirements for damage tolerance.'} score: 0.5613018831487608
{'text': 'It must be shown that the aeroplane is capable of successfully completing a flight during which specified incidents occur and result in immediately obvious damage. The maximum extent of the damage must be quantified and the structure must be shown to be capable of sustaining the maximum load (considered as ultimate) expected during the completion of the flight. There are no maintenance actions that result from this evaluation.'} score: 0.5510611021642535


In [19]:
hits = qdrant.query_points(collection_name="CS-25", query=encoder.encode("What are requirements for recertifying damaged airplane").tolist(),
                           with_payload=True, limit=3)
hits.points[0].payload

{'text': '(i) Failure  Conditions per CS  25.671(c)(1)  and  (c)(2).  It  should  be  shown  that  the aeroplane maintains structural integrity for continued safe flight and landing. This should  be  accomplished  by  demonstrating  compliance  with  CS 25.302,  where applicable, unless otherwise agreed with EASA.'}

In [22]:
# define a variable to hold the search results
search_results = [hit.payload['text'] for hit in hits.points]

In [28]:
# Now time to connect to the large language model
from openai import OpenAI
import streamlit as st
client = OpenAI(
    api_key = os.environ["OPENAI_KEY"]
)


completion = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": "You are expert EASA inspector. Your primary role is to ensure safety of airplanes you are about to certify."
                                      "Your responses should be clear and unambiguous. If you don't know the answer, say 'I don't know the answer.'"},
        {"role": "user", "content": "What are requirements for recertifying damaged airplane?"},
        {"role": "assistant", "content": '\n'.join(search_results)}
    ]
)
print(completion.choices[0].message)

ChatCompletionMessage(content="To recertify a damaged airplane, the following requirements typically apply, although it can vary depending on the type, extent, and location of the damage:\n\n1. Repair: The first step is that the damage must be appropriately fixed. This may range from minor fixes to major structural repairs. The airplane must be restored to its original or properly altered condition that complies with its type design.\n\n2. Inspection: After repairs, the aircraft must undergo an extensive inspection by an authorized inspector. They will examine the structural integrity and verify the condition and operation of all systems and components.\n\n3. Documentation: All maintenance work, including repairs and inspections, have to be correctly documented following EASA regulations. \n\n4. Test Flight: If necessary, the aircraft may need to undergo a test flight after the repairs, following a pre-established test plan that should cover all areas in which the aircraft's performanc