In [33]:
import pandas as pd
from tqdm.auto import tqdm
import re
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk, BulkIndexError
from openai import OpenAI
from sentence_transformers import SentenceTransformer
import json

## Ingestion

In [34]:
with open('../data/flight_manuals_documents-with-ids.json', 'rt') as f_in:
    documents = json.load(f_in)



In [35]:
documents[0]

{'manual_section': 'AOM Section 3',
 'scenario': 'Electrical Failure',
 'instructions': 'Advise cabin crew of situation, maintain visual contact with other aircraft.',
 'id': '61162cbd'}

In [36]:
model = SentenceTransformer('all-MiniLM-L6-v2')



In [37]:
for doc in tqdm(documents):
    scenario = doc['scenario']
    instructions = doc['instructions']
    text = scenario + ". " +instructions
    doc['text'] = text
    doc['text_vector'] = model.encode(text)

  0%|          | 0/300 [00:00<?, ?it/s]

In [38]:
documents[0]

{'manual_section': 'AOM Section 3',
 'scenario': 'Electrical Failure',
 'instructions': 'Advise cabin crew of situation, maintain visual contact with other aircraft.',
 'id': '61162cbd',
 'text': 'Electrical Failure. Advise cabin crew of situation, maintain visual contact with other aircraft.',
 'text_vector': array([ 1.26540717e-02,  2.84629804e-03,  5.22968061e-02,  4.39964682e-02,
         2.07632184e-02,  1.82993114e-02,  4.20230851e-02,  7.63038844e-02,
        -6.08727783e-02,  3.16061154e-02,  6.11943565e-02, -3.47804241e-02,
        -1.91798471e-02,  9.82684270e-02, -6.07081689e-02, -2.01766267e-02,
         5.33694923e-02, -6.71771839e-02, -5.92697896e-02,  1.63402986e-02,
         3.60341519e-02,  7.94296265e-02, -4.43195701e-02,  4.55499962e-02,
        -9.81410742e-02,  5.52850142e-02,  1.96695551e-02, -8.49044882e-03,
        -4.67055365e-02, -5.09895012e-02, -2.03544237e-02,  1.26710339e-02,
        -6.31223768e-02,  1.73256136e-02,  9.05830786e-03,  7.68855587e-02,
     

In [39]:
es_client = Elasticsearch('http://localhost:9200')

In [40]:
index_name = 'flight_manuals'

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "manual_section": {"type": "keyword"},
            "scenario": {"type": "text"},
            "instructions": {"type": "text"},
            "id": {"type": "keyword"},
            "text": {"type": "text"},
            "text_vector": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"}  # Adjust dims based on embedding size
        }
    }
}


In [41]:
# Delete the index if it already exists
if es_client.indices.exists(index=index_name):
    es_client.indices.delete(index=index_name)

# Create the index
es_client.indices.create(index=index_name, body=index_settings)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'flight_manuals'})

In [42]:
def get_embedding(text):
    return model.encode(text).tolist()


 ### Indexing Data with Embeddings

In [43]:
def generate_actions():
    for record in documents:
        yield {
            "_index": index_name,
            "_source": record
            
        }

In [44]:
def bulk_index_data(es_client, actions):
    try:
        bulk(es_client, generate_actions())
        print("Bulk indexing completed successfully!")
    except BulkIndexError as e:
        print(f"{len(e.errors)} documents failed to index")

        failed_documents = e.errors
        for i, error in enumerate(failed_documents, 1):
            action = error['index']
            error_info = action.get('error', {})
            document_id = action.get('_id', "N/A")
            status = action.get('status', "Unknown Status")

            print(f"\nFailed Document {i}:")
            print(f"ID: {document_id}")
            print(f"Status: {status}")
            print(f"Error Type: {error_info.get('type')}")
            print(f"Reason: {error_info.get('reason')}")
            document_source = action.get('data', {})
            print(f"Document Content: {document_source}")

    

In [45]:
bulk_index_data(es_client, generate_actions())


Bulk indexing completed successfully!


In [46]:
client = OpenAI()

In [47]:
def get_user_query(query):
    # For demonstration, you can hardcode a query or accept input
    return query


In [48]:
def get_query_embedding(query):
    return model.encode(query).tolist()


In [49]:
def elastic_search(query_vector, index_name, es_client, top_k=3):
    # Define the script query
    script_query = {
        "script_score": {
            "query": {
                "match_all": {}
            },
            "script": {
                "source": "cosineSimilarity(params.query_vector, 'text_vector') + 1.0",
                "params": {"query_vector": query_vector}
            }
        }
    }
    
    try:
        # Execute the search query
        response = es_client.search(
            index=index_name,
            body={
                "size": top_k,
                "query": script_query,
                "_source": ["manual_section", "scenario", "instructions", "text", "id"]  # Adjust fields as needed
            }
        )
        
        # Collect the search results
        result_docs = [hit['_source'] for hit in response['hits']['hits']]
        
        return result_docs  # Return the results for further processing if needed
        
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


In [50]:
def retrieve_documents(query):
    query_vector = get_query_embedding(query)
    retrieved_docs = elastic_search(query_vector, index_name, es_client, top_k=3)
    return retrieved_docs

In [51]:
retrieve_documents('How should I handle a hydraulic failure during flight?')[0]['text']

'Hydraulic Failure. Limit control inputs to essential movements, monitor hydraulic pressure levels.'

In [53]:
def format_retrieved_documents(retrieved_docs):
    formatted_docs = ""
    for doc in retrieved_docs:
        formatted_docs += f"Manual Section: {doc['manual_section']}\n"
        formatted_docs += f"Scenario: {doc['scenario']}\n"
        formatted_docs += f"Instructions: {doc['instructions']}\n"
        formatted_docs += f"Text: {doc['text']}\n\n"
    return formatted_docs


In [54]:

def build_prompt(question, retrieved_docs):

    prompt_template = """
    You are an assistant helping pilots with flight procedures. Based on the following flight manual information, answer the question:

    Flight Manual Information:
    {retrieved_docs}

    Question:
    {question}

    Answer:
    """.strip()

    # Format the retrieved documents
    formatted_docs = format_retrieved_documents(retrieved_docs)
    
    # Construct the prompt
    prompt = prompt_template.format(retrieved_docs=formatted_docs, question=question)
    
    return prompt

In [55]:
def generate_answer(prompt):
    response = client.chat.completions.create(
        model = 'gpt-4o-mini', 
        messages = [{'role':'assistant', 'content':prompt}],
        max_tokens = 2500,
        temperature=0.5
    )
    answer = response.choices[0].message.content

    return answer


In [56]:
def rag(query):
    query = get_user_query(query)
    retrieved_docs = retrieve_documents(query)
    prompt = build_prompt(query, retrieved_docs)
    answer = generate_answer(prompt)
    #print("\nAnswer:\n", answer)

    return answer

In [57]:
answer = rag("engine failure flight manuals")
print(answer)

In the event of an engine failure, the flight manuals provide the following instructions:

1. **Monitor engine parameters**: Continuously check the performance indicators for the affected engine.
2. **Shut down affected engine if necessary**: If the parameters indicate a critical issue, proceed to safely shut down the malfunctioning engine.
3. **Advise cabin crew**: Inform the cabin crew about the situation to ensure they are prepared for any necessary actions.

These instructions are consistent across multiple sections of the flight manuals, including FCOM Sections 2 and 4, as well as AOM Section 4.


In [58]:
answer = rag("How should I handle a hydraulic failure during flight?")
print(answer)

In the event of a hydraulic failure during flight, you should follow these procedures:

1. **Limit Control Inputs**: Restrict your control movements to only essential actions. Avoid unnecessary or aggressive inputs to prevent further strain on the hydraulic system.

2. **Monitor Hydraulic Pressure Levels**: Continuously observe the hydraulic pressure indicators to assess the status of the hydraulic system. This will help you determine the extent of the failure and any potential impact on aircraft control.

By adhering to these instructions, you can manage the situation more effectively and maintain control of the aircraft.


In [29]:
answer = rag('What shall i do for Icing Conditions scenarios?')
print(answer)

In icing conditions scenarios, you should take the following actions:

1. Report icing conditions to ATC.
2. Request a vector to exit the icing area.
3. Perform de-icing procedures as outlined in the flight manual.


In [30]:
answer = rag('is there Landing Gear Malfunction?')
print(answer)

Yes, there is a Landing Gear Malfunction. The flight manual information indicates that in the event of a landing gear malfunction, the procedures include attempting manual gear extension and preparing for a low approach for visual inspection.


In [31]:
### Load Ground Truth Data
df_ground_truth = pd.read_csv('../data/ground-truth-data-flight-manuals.csv')
ground_truth = df_ground_truth.to_dict(orient='records')


In [32]:
ground_truth[0]

{'id': '61162cbd',
 'question': 'What specific instructions should I give to the cabin crew during an electrical failure?'}

In [55]:
doc_idx = {d['id']: d for d in documents}


In [62]:
doc_idx['61162cbd']

{'manual_section': 'AOM Section 3',
 'scenario': 'Electrical Failure',
 'instructions': 'Advise cabin crew of situation, maintain visual contact with other aircraft.',
 'id': '61162cbd',
 'text': 'Electrical Failure. Advise cabin crew of situation, maintain visual contact with other aircraft.',
 'text_vector': array([ 1.26540717e-02,  2.84629804e-03,  5.22968061e-02,  4.39964682e-02,
         2.07632184e-02,  1.82993114e-02,  4.20230851e-02,  7.63038844e-02,
        -6.08727783e-02,  3.16061154e-02,  6.11943565e-02, -3.47804241e-02,
        -1.91798471e-02,  9.82684270e-02, -6.07081689e-02, -2.01766267e-02,
         5.33694923e-02, -6.71771839e-02, -5.92697896e-02,  1.63402986e-02,
         3.60341519e-02,  7.94296265e-02, -4.43195701e-02,  4.55499962e-02,
        -9.81410742e-02,  5.52850142e-02,  1.96695551e-02, -8.49044882e-03,
        -4.67055365e-02, -5.09895012e-02, -2.03544237e-02,  1.26710339e-02,
        -6.31223768e-02,  1.73256136e-02,  9.05830786e-03,  7.68855587e-02,
     

In [59]:
rag(ground_truth[0]['question'])

"During an electrical failure, you should advise the cabin crew of the situation and ensure they are aware of the need to maintain visual contact with other aircraft. It's important to communicate clearly and provide any additional information they may need to assist passengers and maintain safety."

### Cosine Similarity Metric

In [60]:
answer_orig = 'Electrical Failure. Advise cabin crew of situation, maintain visual contact with other aircraft.'
answer_llm = 'During an electrical failure, you should advise the cabin crew of the situation and ensure they are aware of the need to maintain visual contact with other aircraft. Its important to communicate clearly and provide any additional information they may need to assist passengers and maintain safety'
v_llm = model.encode(answer_llm)
v_orig = model.encode(answer_orig)

v_llm.dot(v_orig)

np.float32(0.80421853)

In [74]:
len(ground_truth)

1500

### Since ground truth dataset is large, need to use parallel processing to speed up the answer generation

In [108]:
import concurrent.futures
import time


In [106]:
def process_record(rec):
    """
    Processes a single record to generate an LLM answer.

    Args:
        rec (dict): A dictionary containing 'id' and 'question'.

    Returns:
        dict: A dictionary with 'doc_id', 'question', and 'answer_llm'.
    """
    try:
        question = rec['question']
        doc_id = rec['id']
        answer_llm = rag(question)
        return {
            'doc_id': doc_id,
            'question': question,
            'answer_llm': answer_llm
        }
    except Exception as e:
        print(f"Error processing record ID {rec.get('id', 'Unknown')}: {e}")
        return {
            'doc_id': rec.get('id', 'Unknown'),
            'question': rec.get('question', 'Unknown'),
            'answer_llm': None
        }


In [109]:
def llm_answers_concurrent(ground_truth, max_workers=10):
    """
    Generates LLM answers for each question in the ground_truth dataset using concurrency.

    Args:
        ground_truth (list of dict): The dataset containing 'id' and 'question'.
        max_workers (int): The maximum number of threads to use.

    Returns:
        list of dict: A list where each dictionary contains 'doc_id', 'question', and 'answer_llm'.
    """
    results = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all tasks to the executor
        futures = {executor.submit(process_record, rec): rec for rec in ground_truth}
        
        # Use tqdm to display the progress bar
        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Generating LLM Answers"):
            result = future.result()
            results.append(result)
    return results

In [111]:
start_time = time.time()
results = llm_answers_concurrent(ground_truth, max_workers=20)  # Adjust max_workers as needed
end_time = time.time()
    
print(f"Processed {len(results)} records in {end_time - start_time:.2f} seconds.")

Generating LLM Answers:   0%|          | 0/1500 [00:00<?, ?it/s]

Processed 1500 records in 282.99 seconds.


In [112]:
df_llm_answers = pd.DataFrame(results)

In [113]:
df_llm_answers.to_csv('../data/gpt4o-mini-flight-manuals-results.csv', index=None)

In [114]:
df_llm_answers = pd.read_csv('../data/gpt4o-mini-flight-manuals-results.csv')

In [115]:
df_llm_answers.head()

Unnamed: 0,doc_id,question,answer_llm
0,15f6e195,How should I manage the aircraft's bank during...,"During a stall warning, you should avoid banki..."
1,f66c8aa4,Can you provide the turbulence penetration spe...,The provided flight manual information does no...
2,15f6e195,Are there any specific maneuvers to avoid whil...,"Yes, when handling a stall warning, you should..."
3,61162cbd,What should I prioritize: informing the cabin ...,You should prioritize maintaining aircraft con...
4,f66c8aa4,What speed should I reduce to during turbulence?,The flight manual information provided does no...


### As we dont have original answers to evaluate the generated llm answers We will use LLM as a Judge approach

In [116]:
prompt2_template = """
    You are an expert evaluator for a Retrieval-Augmented Generation (RAG) system.
    Your task is to analyze the relevance of the generated answer to the given question.
    Based on the relevance of the generated answer, you will classify it
    as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

  Here is the data for evaluation:

  Question: {question}
  Generated Answer: {answer_llm}

  Please analyze the content and context of the generated answer in relation to the question
  and provide your evaluation in parsable JSON without using code blocks:

  {{
    "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
    "Explanation": "[Provide a brief explanation for your evaluation]"
  }}
  """.strip()

In [117]:
df_sample = df_llm_answers.sample(n=150, random_state=1)


In [118]:
samples = df_sample.to_dict(orient='records')


In [119]:
record = samples[0]
record

{'doc_id': '409ce57a',
 'question': 'Can you guide me through the alternate gear extension procedures?',
 'answer_llm': "Certainly! While the specific steps for alternate gear extension procedures can vary depending on the aircraft type, the general process usually involves the following steps:\n\n1. **Prepare for Alternate Extension**:\n   - Ensure that you have notified ATC of the landing gear malfunction and your intentions.\n   - Assess the situation and prepare the cockpit for the alternate gear extension.\n\n2. **Locate the Alternate Gear Extension Mechanism**:\n   - Identify the location of the alternate gear extension handle or mechanism in the cockpit. This is typically found in the landing gear section of the overhead panel or near the throttle quadrant.\n\n3. **Check the Gear Position Indicators**:\n   - Verify the current position of the landing gear using the gear position indicators. This will help you understand whether the gear is up or down.\n\n4. **Perform the Alterna

In [120]:
prompt = prompt2_template.format(**record)
print(prompt)

You are an expert evaluator for a Retrieval-Augmented Generation (RAG) system.
    Your task is to analyze the relevance of the generated answer to the given question.
    Based on the relevance of the generated answer, you will classify it
    as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

  Here is the data for evaluation:

  Question: Can you guide me through the alternate gear extension procedures?
  Generated Answer: Certainly! While the specific steps for alternate gear extension procedures can vary depending on the aircraft type, the general process usually involves the following steps:

1. **Prepare for Alternate Extension**:
   - Ensure that you have notified ATC of the landing gear malfunction and your intentions.
   - Assess the situation and prepare the cockpit for the alternate gear extension.

2. **Locate the Alternate Gear Extension Mechanism**:
   - Identify the location of the alternate gear extension handle or mechanism in the cockpit. This is typically found i

In [121]:
evaluations = []

for record in tqdm(samples):
    prompt = prompt2_template.format(**record)
    evaluation = generate_answer(prompt)    
    evaluations.append(evaluation)

  0%|          | 0/150 [00:00<?, ?it/s]

In [124]:
evaluations[0]

'{\n  "Relevance": "RELEVANT",\n  "Explanation": "The generated answer provides a detailed step-by-step guide on alternate gear extension procedures, addressing the question directly. It includes essential actions to take, checks to perform, and communication with ATC, which are all relevant to the inquiry about guiding someone through these procedures."\n}'

In [125]:
json_evaluations = []

for i, str_eval in enumerate(evaluations):
    json_eval = json.loads(str_eval)
    json_evaluations.append(json_eval)

In [126]:
json_evaluations[0]

{'Relevance': 'RELEVANT',
 'Explanation': 'The generated answer provides a detailed step-by-step guide on alternate gear extension procedures, addressing the question directly. It includes essential actions to take, checks to perform, and communication with ATC, which are all relevant to the inquiry about guiding someone through these procedures.'}

In [127]:
df_evaluations = pd.DataFrame(json_evaluations)


In [128]:
df_evaluations.Relevance.value_counts()


Relevance
RELEVANT           130
PARTLY_RELEVANT     19
NON_RELEVANT         1
Name: count, dtype: int64

In [129]:
df_evaluations[df_evaluations['Relevance'] == "NON_RELEVANT"]

Unnamed: 0,Relevance,Explanation
58,NON_RELEVANT,The generated answer does not provide any info...


In [131]:
df_evaluations.to_csv('../data/flight-manuals-evaluations-qa.csv', index=False)
