In [36]:
import pandas as pd
from tqdm.auto import tqdm
import re
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk, BulkIndexError
from openai import OpenAI
from sentence_transformers import SentenceTransformer

## Ingestion

In [2]:
df_aircraft_systems_status = pd.read_csv('../data/aircraft_systems_status.csv', encoding='ISO-8859-1')
df_airport_operations_status = pd.read_csv('../data/airport_operations_status.csv', encoding='ISO-8859-1')
df_atc_communications = pd.read_csv('../data/atc_communications.csv', encoding='ISO-8859-1')
df_cargo_and_weight_management = pd.read_csv('../data/cargo_and_weight_management.csv', encoding='ISO-8859-1')
df_crew_and_passenger_status = pd.read_csv('../data/crew_and_passenger_status.csv', encoding='ISO-8859-1')
df_emergency_procedures = pd.read_csv('../data/emergency_procedures.csv', encoding='ISO-8859-1')
df_flight_manuals = pd.read_csv('../data/flight_manuals.csv', encoding='ISO-8859-1')
df_flight_plan_monitoring = pd.read_csv('../data/flight_plan_monitoring.csv', encoding='ISO-8859-1')
df_fuel_management = pd.read_csv('../data/fuel_management.csv', encoding='ISO-8859-1')
df_historical_incidents = pd.read_csv('../data/historical_incidents.csv', encoding='ISO-8859-1')
df_notams = pd.read_csv('../data/notams.csv', encoding='ISO-8859-1')
df_real_time_air_traffic = pd.read_csv('../data/real_time_air_traffic.csv', encoding='ISO-8859-1')
df_satellite_communication_status = pd.read_csv('../data/satellite_communication_status.csv', encoding='ISO-8859-1')
df_waypoint_and_route_info = pd.read_csv('../data/waypoint_and_route_info.csv', encoding='ISO-8859-1')
df_weather_predictions = pd.read_csv('../data/weather_predictions.csv', encoding='ISO-8859-1')
df_weather = pd.read_csv('../data/weather.csv', encoding='ISO-8859-1')



In [11]:
df_flight_manuals.head()

Unnamed: 0,Manual Section,Scenario,Instructions
0,AOM Section 10,Hydraulic Failure,"Switch to backup hydraulic system, contact ATC..."
1,AOM Section 7,Navigation System Failure,"Switch to backup navigation system, continue f..."
2,AOM Section 3,Landing Gear Malfunction,"Attempt manual gear extension, prepare for low..."
3,AOM Section 3,Icing Conditions,"Activate anti-ice systems, monitor fuel temper..."
4,AOM Section 10,Electrical Failure,"Switch to backup power system, reduce non-esse..."


### Data Preprocessing

In [13]:
def clean_column_names(df):
    clean_column_names = []
    for col in df.columns:
        clean_column_names.append(col.lower().replace(' ','_'))
     
    return clean_column_names

In [14]:
df_flight_manuals.columns = clean_column_names(df_flight_manuals)

In [15]:
df_flight_manuals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 286 entries, 0 to 285
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   manual_section  286 non-null    object
 1   scenario        286 non-null    object
 2   instructions    286 non-null    object
dtypes: object(3)
memory usage: 6.8+ KB


In [17]:
# Check for missing values
print(df_flight_manuals.isnull().sum())

# Drop rows with missing values (if any)
df_flight_manuals.dropna(inplace=True)

# Reset index after dropping rows
df_flight_manuals.reset_index(drop=True, inplace=True)

manual_section    0
scenario          0
instructions      0
dtype: int64


In [19]:
# Create a 'Text' column that combines 'Scenario' and 'Instructions'
df_flight_manuals['text'] = df_flight_manuals['scenario'] + ". " + df_flight_manuals['instructions']


In [20]:
df_flight_manuals.head()

Unnamed: 0,manual_section,scenario,instructions,text
0,AOM Section 10,Hydraulic Failure,"Switch to backup hydraulic system, contact ATC...",Hydraulic Failure. Switch to backup hydraulic ...
1,AOM Section 7,Navigation System Failure,"Switch to backup navigation system, continue f...",Navigation System Failure. Switch to backup na...
2,AOM Section 3,Landing Gear Malfunction,"Attempt manual gear extension, prepare for low...",Landing Gear Malfunction. Attempt manual gear ...
3,AOM Section 3,Icing Conditions,"Activate anti-ice systems, monitor fuel temper...","Icing Conditions. Activate anti-ice systems, m..."
4,AOM Section 10,Electrical Failure,"Switch to backup power system, reduce non-esse...",Electrical Failure. Switch to backup power sys...


In [16]:
es_client = Elasticsearch('http://localhost:9200')

In [51]:
index_name = 'flight_manuals'

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "manual_section": {"type": "keyword"},
            "scenario": {"type": "text"},
            "instructions": {"type": "text"},
            "text": {"type": "text"},
            "embedding": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"}  # Adjust dims based on embedding size
        }
    }
}


In [52]:
# Delete the index if it already exists
if es_client.indices.exists(index=index_name):
    es_client.indices.delete(index=index_name)

# Create the index
es_client.indices.create(index=index_name, body=index_settings)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'flight_manuals'})

In [26]:
model = SentenceTransformer('all-MiniLM-L6-v2')

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [29]:
len(model.encode("This is a simple sentence"))

384

In [31]:
def get_embedding(text):
    return model.encode(text).tolist()

# Generate embeddings for all texts
df_flight_manuals['embedding'] = df_flight_manuals['text'].apply(get_embedding)


 ### Indexing Data with Embeddings

In [34]:
# Prepare Data for Indexing

records = df_flight_manuals.to_dict(orient='records')


In [39]:
def generate_actions():
    for record in records:
        yield {
            "_index": index_name,
            "_source": record
            
        }

In [53]:
def bulk_index_data(es_client, actions):
    try:
        bulk(es_client, generate_actions())
        print("Bulk indexing completed successfully!")
    except BulkIndexError as e:
        print(f"{len(e.errors)} documents failed to index")

        failed_documents = e.errors
        for i, error in enumerate(failed_documents, 1):
            action = error['index']
            error_info = action.get('error', {})
            document_id = action.get('_id', "N/A")
            status = action.get('status', "Unknown Status")

            print(f"\nFailed Document {i}:")
            print(f"ID: {document_id}")
            print(f"Status: {status}")
            print(f"Error Type: {error_info.get('type')}")
            print(f"Reason: {error_info.get('reason')}")
            document_source = action.get('data', {})
            print(f"Document Content: {document_source}")

    

In [54]:
bulk_index_data(es_client, generate_actions())


Bulk indexing completed successfully!


#### Implementing User Query Functions

In [59]:
client = OpenAI()

In [67]:
def get_user_query(query):
    # For demonstration, you can hardcode a query or accept input
    return query


In [70]:
def get_query_embedding(query):
    return model.encode(query).tolist()


In [130]:
def elastic_search(query_vector, index_name, es_client, top_k=3):
    # Define the script query
    script_query = {
        "script_score": {
            "query": {
                "match_all": {}
            },
            "script": {
                "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                "params": {"query_vector": query_vector}
            }
        }
    }
    
    try:
        # Execute the search query
        response = es_client.search(
            index=index_name,
            body={
                "size": top_k,
                "query": script_query,
                "_source": ["manual_section", "scenario", "instructions", "text"]  # Adjust fields as needed
            }
        )
        
        # Collect the search results
        result_docs = [hit['_source'] for hit in response['hits']['hits']]
        
        return result_docs  # Return the results for further processing if needed
        
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


In [131]:
def retrieve_documents(query):
    query_vector = get_query_embedding(query)
    retrieved_docs = elastic_search(query_vector, index_name, es_client, top_k=3)
    return retrieved_docs

In [132]:
def format_retrieved_documents(retrieved_docs):
    formatted_docs = ""
    for doc in retrieved_docs:
        formatted_docs += f"Manual Section: {doc['manual_section']}\n"
        formatted_docs += f"Scenario: {doc['scenario']}\n"
        formatted_docs += f"Instructions: {doc['instructions']}\n\n"
    return formatted_docs

In [149]:

def build_prompt(question, retrieved_docs):

    prompt_template = """
    You are an assistant helping pilots with flight procedures. Based on the following flight manual information, answer the question:

    Flight Manual Information:
    {retrieved_docs}

    Question:
    {question}

    Answer:
    """.strip()

    # Format the retrieved documents
    formatted_docs = format_retrieved_documents(retrieved_docs)
    
    # Construct the prompt
    prompt = prompt_template.format(retrieved_docs=formatted_docs, question=question)
    
    return prompt

In [159]:
def generate_answer(prompt):
    response = client.chat.completions.create(
        model = 'gpt-4o-mini', 
        messages = [{'role':'assistant', 'content':prompt}],
        max_tokens = 2500,
        temperature=0.5
    )
    answer = response.choices[0].message.content

    return answer


In [161]:
def rag(query):
    query = get_user_query(query)
    retrieved_docs = retrieve_documents(query)
    prompt = build_prompt(query, retrieved_docs)
    answer = generate_answer(prompt)
    #print("\nAnswer:\n", answer)

    return answer

In [162]:
answer = rag("engine failure flight manuals")
print(answer)

In the event of an engine failure, the flight manual instructions across various sections are consistent. Here are the key steps to follow:

1. **Throttle Idle**: Reduce the throttle to idle to manage the engine's power output.
2. **Apply Maximum Climb**: Attempt to maintain or achieve maximum climb performance to gain altitude and control.
3. **Contact ATC**: Communicate with Air Traffic Control to request emergency landing clearance.

These procedures are outlined in AOM Section 2, Section 5, and Section 10, ensuring that pilots have clear and uniform guidance for handling engine failure scenarios.


In [166]:
answer = rag("How should I handle a hydraulic failure during flight?")
print(answer)

In the event of a hydraulic failure during flight, you should follow these steps:

1. **Switch to the Backup Hydraulic System**: Immediately activate the backup hydraulic system to maintain control and functionality of essential systems.

2. **Contact ATC**: Communicate with Air Traffic Control (ATC) to inform them of the situation and request a diversion to the nearest suitable airport for a safe landing.

3. **Monitor Instruments**: Keep an eye on your instruments and systems to assess the status of the backup hydraulic system and any other affected systems.

4. **Prepare for Landing**: Follow any additional procedures outlined in your flight manual for landing with a hydraulic failure, including any necessary checklists.

5. **Maintain Situational Awareness**: Stay aware of your surroundings and any changes in flight conditions as you prepare for the diversion.

Always refer to your specific aircraft's operating manual for detailed procedures and checklists related to hydraulic fail

In [167]:
answer = rag('What shall i do for Icing Conditions scenarios?')
print(answer)

For Icing Conditions scenarios, you should:

1. Activate the anti-ice systems.
2. Monitor the fuel temperature.
3. Consider changing altitude if necessary.


In [165]:
answer = rag('is there Landing Gear Malfunction?')
print(answer)

In the event of a landing gear malfunction, follow these procedures:

1. Attempt manual gear extension.
2. Prepare for a low approach to allow for visual inspection of the landing gear.
3. Prepare for an emergency landing if the gear does not extend properly.

Ensure to communicate with air traffic control and follow any additional emergency protocols as necessary.


In [169]:
import whisper



ImportError: Numba needs NumPy 2.0 or less. Got NumPy 2.1.

## Retrieval Evaluation:

In [58]:
df_questions = pd.read_csv('../data/ground-truth-retrieval.csv')

In [59]:
df_questions

Unnamed: 0,id,question
0,0,What is the starting position for doing push-ups?
1,0,Which muscle groups are activated during push-...
2,0,How do you know when to push back up while doi...
3,0,Do you need any equipment to perform push-ups?
4,0,What part of the body do push-ups primarily ta...
...,...,...
1030,206,What muscles are primarily worked during the D...
1031,206,What type of exercise is the Dumbbell Bench Pr...
1032,206,Can the Dumbbell Bench Press be performed with...
1033,206,What body part does the Dumbbell Bench Press f...


In [60]:
ground_truth = df_questions.to_dict(orient='records')

In [61]:
ground_truth[0]

{'id': 0, 'question': 'What is the starting position for doing push-ups?'}

In [62]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

In [63]:
def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [64]:
def minsearch_search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [66]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [67]:
from tqdm.auto import tqdm

In [68]:
evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/1035 [00:00<?, ?it/s]

{'hit_rate': 0.9478260869565217, 'mrr': 0.822744038033893}

## Finding the best parameters


In [70]:
df_validation = df_questions[:100]
df_test = df_questions[100:]

In [71]:
import random

def simple_optimize(param_ranges, objective_function, n_iterations=10):
    best_params = None
    best_score = float('-inf')  # Assuming we're minimizing. Use float('-inf') if maximizing.

    for _ in range(n_iterations):
        # Generate random parameters
        current_params = {}
        for param, (min_val, max_val) in param_ranges.items():
            if isinstance(min_val, int) and isinstance(max_val, int):
                current_params[param] = random.randint(min_val, max_val)
            else:
                current_params[param] = random.uniform(min_val, max_val)
        
        # Evaluate the objective function
        current_score = objective_function(current_params)
        
        # Update best if current is better
        if current_score > best_score:  # Change to > if maximizing
            best_score = current_score
            best_params = current_params
    
    return best_params, best_score

In [72]:
gt_val = df_validation.to_dict(orient='records')

In [73]:
def minsearch_search(query, boost=None):
    if boost is None:
        boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [74]:
param_ranges = {
    'exercise_name': (0.0, 3.0),
    'type_of_activity': (0.0, 3.0),
    'type_of_equipment': (0.0, 3.0),
    'body_part': (0.0, 3.0),
    'type': (0.0, 3.0),
    'muscle_groups_activated': (0.0, 3.0),
    'instructions': (0.0, 3.0),
}

def objective(boost_params):
    def search_function(q):
        return minsearch_search(q['question'], boost_params)

    results = evaluate(gt_val, search_function)
    return results['mrr']

In [75]:
simple_optimize(param_ranges, objective, n_iterations=20)


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

({'exercise_name': 1.1162887216689357,
  'type_of_activity': 1.2053964373475001,
  'type_of_equipment': 0.0157474211791101,
  'body_part': 0.5117172999420851,
  'type': 1.9562913342437052,
  'muscle_groups_activated': 2.7508697687269983,
  'instructions': 0.27669361047746277},
 0.847)

In [76]:
def minsearch_improved(query):
    boost = {
        'exercise_name': 2.11,
        'type_of_activity': 1.46,
        'type_of_equipment': 0.65,
        'body_part': 2.65,
        'type': 1.31,
        'muscle_groups_activated': 2.54,
        'instructions': 0.74
    }

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

evaluate(ground_truth, lambda q: minsearch_improved(q['question']))

  0%|          | 0/1035 [00:00<?, ?it/s]

{'hit_rate': 0.9468599033816425, 'mrr': 0.9029733149298367}

## RAG Evaluation

In [77]:
prompt2_template = """
You are an expert evaluator for a Retrieval-Augmented Generation (RAG) system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [78]:
len(ground_truth)


1035

In [81]:
record = ground_truth[0]
record

{'id': 0, 'question': 'What is the starting position for doing push-ups?'}

In [84]:
record = ground_truth[0]
question = record['question']
answer_llm = rag(question)

In [85]:
print(answer_llm)

The starting position for doing push-ups is to start in a high plank position with your hands under your shoulders.


In [87]:
prompt = prompt2_template.format(question=question, answer_llm=answer_llm)
print(prompt)

You are an expert evaluator for a Retrieval-Augmented Generation (RAG) system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: What is the starting position for doing push-ups?
Generated Answer: The starting position for doing push-ups is to start in a high plank position with your hands under your shoulders.

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}


In [89]:
print(llm(prompt))

{
  "Relevance": "RELEVANT",
  "Explanation": "The generated answer accurately describes the starting position for doing push-ups, specifically mentioning the high plank position and the placement of hands, which directly addresses the question."
}


In [90]:
evaluations = {}


In [91]:
import json

In [98]:
df_sample = df_questions.sample(n=200, random_state=1)
sample = df_sample.to_dict(orient='records')


In [99]:
evaluations = []

for record in tqdm(sample):
    question = record['question']
    answer_llm = rag(question) 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)

    evaluations.append((record, answer_llm, evaluation))
    

  0%|          | 0/200 [00:00<?, ?it/s]

In [100]:
evaluations

[({'id': 171,
   'question': 'What is the primary muscle group activated during the Banded Pull-Up?'},
  'The primary muscle group activated during the Banded Pull-Up is the Latissimus Dorsi.',
  {'Relevance': 'RELEVANT',
   'Explanation': 'The generated answer correctly identifies the primary muscle group activated during the Banded Pull-Up, which directly addresses the question asked.'}),
 ({'id': 115,
   'question': 'Can jumping squats be performed without any equipment?'},
  'Yes, jumping squats can be performed without any equipment, as they use bodyweight.',
  {'Relevance': 'RELEVANT',
   'Explanation': 'The generated answer directly addresses the question by confirming that jumping squats can be performed without any equipment and explains that they utilize bodyweight, which is pertinent information.'}),
 ({'id': 53,
   'question': 'Can you explain the proper technique for executing a Dumbbell Lateral Raise?'},
  'To execute a Dumbbell Lateral Raise properly, stand with a dumbbe

In [101]:
df_eval = pd.DataFrame(evaluations, columns=['record', 'answer', 'evaluation'])
df_eval

Unnamed: 0,record,answer,evaluation
0,"{'id': 171, 'question': 'What is the primary m...",The primary muscle group activated during the ...,"{'Relevance': 'RELEVANT', 'Explanation': 'The ..."
1,"{'id': 115, 'question': 'Can jumping squats be...","Yes, jumping squats can be performed without a...","{'Relevance': 'RELEVANT', 'Explanation': 'The ..."
2,"{'id': 53, 'question': 'Can you explain the pr...","To execute a Dumbbell Lateral Raise properly, ...","{'Relevance': 'RELEVANT', 'Explanation': 'The ..."
3,"{'id': 198, 'question': 'How do I ensure my ar...",To ensure your arms are fully extended during ...,"{'Relevance': 'RELEVANT', 'Explanation': 'The ..."
4,"{'id': 19, 'question': 'What part of the body ...",The part of the body primarily engaged in Gobl...,"{'Relevance': 'PARTLY_RELEVANT', 'Explanation'..."
...,...,...,...
195,"{'id': 83, 'question': 'What specific body par...","The Pendlay Row targets the Upper Body, specif...","{'Relevance': 'PARTLY_RELEVANT', 'Explanation'..."
196,"{'id': 94, 'question': 'How does the TRX varia...",The TRX variation differs from traditional pus...,"{'Relevance': 'RELEVANT', 'Explanation': 'The ..."
197,"{'id': 123, 'question': 'Can beginners safely ...",NONE,"{'Relevance': 'NON_RELEVANT', 'Explanation': '..."
198,"{'id': 76, 'question': 'How should I position ...",Lie on your back with legs extended. Lift your...,"{'Relevance': 'RELEVANT', 'Explanation': 'The ..."


In [102]:
df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])

df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

In [103]:
del df_eval['record']
del df_eval['evaluation']

In [104]:
df_eval.relevance.value_counts(normalize=True)


relevance
RELEVANT           0.695
PARTLY_RELEVANT    0.185
NON_RELEVANT       0.120
Name: proportion, dtype: float64

In [105]:
df_eval.relevance.value_counts()


relevance
RELEVANT           139
PARTLY_RELEVANT     37
NON_RELEVANT        24
Name: count, dtype: int64

In [106]:
df_eval.to_csv('../data/rag-eval-gpt-4o-mini.csv', index=False)


In [107]:
df_eval[df_eval.relevance == 'NON_RELEVANT']


Unnamed: 0,answer,id,question,relevance,explanation
19,The One-Arm Dumbbell Snatch is considered a pu...,190,Is the One-Arm Dumbbell Snatch considered a pu...,NON_RELEVANT,The generated answer incorrectly categorizes t...
22,NONE,30,Can calf raises target any other body parts be...,NON_RELEVANT,The generated answer 'NONE' does not provide a...
49,NONE,36,What is the correct position to start the Incl...,NON_RELEVANT,The generated answer 'NONE' does not provide a...
52,The Cable Lateral Raise is a push exercise.,100,Is the Cable Lateral Raise a push or pull exer...,NON_RELEVANT,The generated answer incorrectly categorizes t...
57,NONE,99,What is the appropriate weight for the dumbbel...,NON_RELEVANT,The generated answer 'NONE' does not address t...
64,NONE,54,How long should I hold the Wall Sit position f...,NON_RELEVANT,The generated answer 'NONE' does not provide a...
67,NONE,107,Can you explain the correct stance for perform...,NON_RELEVANT,"The generated answer is 'NONE', which fails to..."
69,NONE,17,What is the primary body position to start per...,NON_RELEVANT,The generated answer 'NONE' does not provide a...
76,NONE,187,Is the Reverse Grip Lat Pulldown suitable for ...,NON_RELEVANT,The generated answer 'NONE' does not provide a...
79,NONE,99,How do I avoid injury while doing the Dumbbell...,NON_RELEVANT,The generated answer 'NONE' does not provide a...
