# Collect your data and store it in a pandas dataframe

In [23]:
import os
import spacy
from spacy_layout import spaCyLayout
from spacy.tokens import DocBin

nlp = spacy.load('en_core_web_sm')
layout = spaCyLayout(nlp)

pdf_path = "CS-25.pdf"
pdf = layout(pdf_path)

In [None]:
# Ensure output folder exists
os.makedirs('docbin', exist_ok=True)

doc_bin = DocBin(store_user_data=True)
doc_bin.add(pdf)

output_path = os.path.join('docbin\\', 'CS-25.spacy')
with open(output_path, "wb") as output_file:
    output_file.write(doc_bin.to_bytes())



In [None]:
dbin = DocBin(store_user_data=True)
pdf = dbin.from_disk('docbin\\CS-25.spacy')

spacy_docs = list(dbin.get_docs(nlp.vocab))[0]

# Initialize vector database

In [15]:
from qdrant_client import models, QdrantClient
from qdrant_client.models import PointStruct
from sentence_transformers import SentenceTransformer

In [8]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [75]:
doc_embeddings = encoder.encode(spacy_docs.text.split('\n'))

In [77]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [78]:
qdrant.create_collection(
    collection_name="CS-25",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

True

In [79]:
points = []
sentences = spacy_docs.text.split('\n')
for i in range(len(doc_embeddings)):
    points.append(PointStruct(id=i,
                              vector=[doc_embeddings[i].tolist()],
                              payload={'text': sentences[i]}
                              ))

In [80]:
qdrant.upsert('CS-25', points)

  qdrant.upsert('CS-25', points)


UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [81]:
hits = qdrant.search(
    collection_name="CS-25",
    query_vector=encoder.encode("What are requirements for recertifying damaged airplane").tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

  hits = qdrant.search(


{'text': '(i) Failure  Conditions per CS  25.671(c)(1)  and  (c)(2).  It  should  be  shown  that  the aeroplane maintains structural integrity for continued safe flight and landing. This should  be  accomplished  by  demonstrating  compliance  with  CS 25.302,  where applicable, unless otherwise agreed with EASA.'} score: 0.5730004375136402
{'text': 'Note: This paragraph applies only to aircraft with a certification basis including CS 25.571 or equivalent requirements for damage tolerance.'} score: 0.5613018831487608
{'text': 'It must be shown that the aeroplane is capable of successfully completing a flight during which specified incidents occur and result in immediately obvious damage. The maximum extent of the damage must be quantified and the structure must be shown to be capable of sustaining the maximum load (considered as ultimate) expected during the completion of the flight. There are no maintenance actions that result from this evaluation.'} score: 0.5510611021642535


In [90]:
# define a variable to hold the search results
search_results = [hit.payload['text'] for hit in hits]

In [94]:
search_results

['(i) Failure  Conditions per CS  25.671(c)(1)  and  (c)(2).  It  should  be  shown  that  the aeroplane maintains structural integrity for continued safe flight and landing. This should  be  accomplished  by  demonstrating  compliance  with  CS 25.302,  where applicable, unless otherwise agreed with EASA.',
 'Note: This paragraph applies only to aircraft with a certification basis including CS 25.571 or equivalent requirements for damage tolerance.',
 'It must be shown that the aeroplane is capable of successfully completing a flight during which specified incidents occur and result in immediately obvious damage. The maximum extent of the damage must be quantified and the structure must be shown to be capable of sustaining the maximum load (considered as ultimate) expected during the completion of the flight. There are no maintenance actions that result from this evaluation.']

In [95]:
# Now time to connect to the large language model
from openai import OpenAI
client = OpenAI(
    api_key = os.environ["OPENAI_KEY"]
)
completion = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": "You are expert EASA inspector. Your primary role is to ensure safety of airplanes you are about to certify."
                                      "Your responses should be clear and unambiguous. If you don't know the answer, say 'I don't know the answer.'"},
        {"role": "user", "content": "What are requirements for recertifying damaged airplane?"},
        {"role": "assistant", "content": '\n'.join(search_results)}
    ]
)
print(completion.choices[0].message)

ChatCompletionMessage(content="The recertification process of a damaged airplane includes several steps such as:\n\n1. Thorough Inspection: The aircraft must undergo a comprehensive assessment to determine the extent of the damage. This includes not only the visible damage but also any potential internal or structural damage that might have occurred.\n\n2. Repair: Based on the damage assessment report, all necessary repairs must be carried out. This must be done by certified personnel and should adhere to the aircraft manufacturer's maintenance manual or other approved FAA maintenance procedures.\n\n3. Testing: Following the repairs, the aircraft needs to be tested to ensure that all the systems are working correctly. This includes operational checks as well as flight tests.\n\n4. Documentation: All the repair and testing processes need to be documented per EASA regulations. The documentation needs to provide enough details about all the steps taken during the repair and testing proces