In [5]:
#Import libraries
!pip install google-genai
import os
import glob
import json
import asyncio
from typing import List, Optional
from pydantic import BaseModel, Field, field_validator
from google import genai
from google.genai import types
import traceback

#use dict to make data easy to get
data_list = [
    {"Aircraft Model": "MD-11", "MSN": 48123, "Modifications": "None"},
    {"Aircraft Model": "DC-10-30F", "MSN": 47890, "Modifications": "None"},
    {"Aircraft Model": "Boeing 737-800", "MSN": 30123, "Modifications": "None"},
    {"Aircraft Model": "A320-214", "MSN": 5234, "Modifications": "None"},
    {"Aircraft Model": "A320-232", "MSN": 6789, "Modifications": "mod 24591 (production)"},
    {"Aircraft Model": "A320-214", "MSN": 7456, "Modifications": "SB A320-57-1089 Rev 04"},
    {"Aircraft Model": "A321-111", "MSN": 8123, "Modifications": "None"},
    {"Aircraft Model": "A321-112", "MSN": 364, "Modifications": "mod 24977 (production)"},
    {"Aircraft Model": "A319-100", "MSN": 9234, "Modifications": "None"},
    {"Aircraft Model": "MD-10-10F", "MSN": 46234, "Modifications": "None"},
]



In [6]:
#Initialize pydantic schema
class ApplicabilityRules(BaseModel):
    aircraft_models: List[str] = Field(default_factory=list,description="list of aircraft models affected")
    msn_constraints: List[str] = Field(default_factory=list,description="Manufacture Serial Number constraints")
    excluded_if_mod: List[str] = Field(default_factory=list,description="excluded if modification exist")
    required_mod: List[str] = Field(default_factory=list,description="required modification")

    # normal "None → []" validator for the normal list fields
    @field_validator(
        "aircraft_models",
        "excluded_if_mod",
        "required_mod",
        mode="before"
    )
    def normalize_lists(cls, v):
        if v is None:
            return []
        return v

    # special validator ONLY for msn_constraints
    @field_validator("msn_constraints", mode="before")
    def allow_string_or_list(cls, v):
        if v is None:
            return []
        # If model returns "all" or similar → treat as a single string in a list
        if isinstance(v, str):
            return [v]
        # If it's already a list, pass it through
        if isinstance(v, list):
            return v
        raise TypeError(f"Invalid msn_constraints value: {v}")


class PDFAnalysis(BaseModel):
    ad_id: str = Field(description="document name")
    applicability_rules: ApplicabilityRules = Field(description="applicability rule structure")


Below will be the document processor, i use Google Gemini for ease of setup as i have experience in using and prompt engineering it. the workflow will be as following:
->load all pdf documents in folder and send to AI.
->AI will return structured result.
->Structured result will be parsed by pydantic to ensure integrity.
->Save data to Json.

In [7]:
#Functions, Async because it's faster and genai can process multiple document at once 
async def process_folder_pdfs(folder_path: str) -> PDFAnalysis:
    client = genai.Client(api_key="AIzaSyBo_JY4TRWQfrRBGT1jBllFVUW-WaHm6cU")#i know it's bad idea to put this here but i make life simple
    user_prompt = """
Analyze the provided Airworthiness Directive (AD) document. 
Extract all information necessary to populate the required JSON schema, focusing exclusively on the AD's Applicability section (Section (a) / Applicability). 
- 'excluded_if_mod' must list any modifications or Service Bulletins (SB) that prevent the AD from applying if they are already embodied. 
- 'required_mod' should only list modifications that must be present for the AD to apply (rarely used).
- 'msn_constraints is either a list or null or all.
- The field 'applicability_rules' MUST be a nested object containing the aircraft and modification constraints.
Example Output Structure:
{
  "ad_id": "EASA-AD-2025-208",
  "applicability_rules": {
    "aircraft_models": ["MD-11"(example),],
    "msn_constraints": null, 
    "excluded_if_mod": [],
    "required_mod": []
  }
}
"""
    # collect all PDFs into a list of Parts
    pdf_parts = []
    try:
        for file_name in os.listdir(folder_path):
            if file_name.lower().endswith(".pdf"):
                file_path = os.path.join(folder_path, file_name)

                with open(file_path, "rb") as f:
                    pdf_bytes = f.read()

                pdf_parts.append(
                    types.Part.from_bytes(
                        data=pdf_bytes,
                        mime_type="application/pdf"
                    )
                )
    except Exception as e:
        print(f"Error reading folder: {e}")
        print(traceback.format_exc())
        raise

    if not pdf_parts:
        raise ValueError("No PDF files detected in folder.")

    # call Gemini ONCE with all PDFs in one folder
    response = await client.aio.models.generate_content(
        model="gemini-2.5-pro",
        contents=pdf_parts + [user_prompt],
        config=types.GenerateContentConfig(
            response_mime_type="application/json"
        ),
    )

    print("\n---------- RAW MODEL OUTPUT ----------")
    print(response.text)
    print("--------------------------------------\n")

    # parse into Pydantic object
    raw = json.loads(response.text)
    if isinstance(raw, list):
        if not raw:
            raise ValueError("Gemini returned an empty list.")
        raw = raw[0]   # <-- TAKE FIRST ELEMENT

    result = PDFAnalysis.model_validate(raw)
    return result


def dump_analysis_to_json_by_ad_id(analysis_list: List[PDFAnalysis], output_filename: str = None) -> None:
    if not analysis_list:
        print("No results to dump.")
        return

    if output_filename is None:
        output_filename = f"combined_ad_analysis.json"

    def normalize_ad_id(raw: str) -> str:
        """
        Convert variations like 'EASA AD 2025-0254'
        into 'EASA-AD-2025-0254'
        """
        parts = raw.replace("_", " ").replace("-", " ").split()
        # Expect something like ["EASA", "AD", "2025", "0254"]
        return "-".join(parts)

    grouped_data = {}
    for item in analysis_list:
        clean_id = normalize_ad_id(item.ad_id)
        grouped_data[clean_id] = item.model_dump(by_alias=True, exclude={"ad_id"})

    try:
        with open(output_filename, "w", encoding="utf-8") as f:
            json.dump(grouped_data, f, indent=4)
        print(f"Saved {len(grouped_data)} AD entries to {output_filename}")
    except Exception as e:
        print(f"Error writing JSON: {e}")

Below is function to check the aircrafts in data_list dict, so that we will know their results. the json output will be included at the last part of this notebook. workflow will be as following:
->Load "combined_ad_analysis.json".
->Compare data_list to loaded json database.
->Output Json of the comparation result.

In [8]:
# funtion to compare aircrafts in data_list to the json database
def check_aircraft_against_ads(data_list, json_path):
    """
    Compare aircraft list against AD applicability rules in combined_ad_analysis.json.
    Returns a detailed list of results.
    """

    # ---------------------------
    # Load the combined AD JSON
    # ---------------------------
    with open(json_path, "r", encoding="utf-8") as f:
        ad_data = json.load(f)

    # Normalize AD keys
    ads = {
        ad_id: rules["applicability_rules"]
        for ad_id, rules in ad_data.items()
    }

    results = []

    # Helper: Check if an aircraft is affected by a single AD
    def evaluate_aircraft(aircraft, ad_rules):
        model = aircraft["Aircraft Model"]
        msn = str(aircraft["MSN"])
        mods = aircraft["Modifications"].lower()

        # --- Check model applicability ---
        models = [m.lower() for m in ad_rules["aircraft_models"]]

        if model.lower() not in models:
            return "Not Applicable"

        # --- Check excluded mods ---
        for xmod in ad_rules["excluded_if_mod"]:
            if xmod.lower() in mods:
                return "No"

        # --- Check MSN constraints ---
        msn_constraints = ad_rules["msn_constraints"]

        # Case: "ALL" included → any MSN is valid
        if len(msn_constraints) == 1 and msn_constraints[0].lower() == "all":
            pass
        else:
            if msn not in msn_constraints:
                return "No"

        # --- Required mods ---
        for rmod in ad_rules["required_mod"]:
            if rmod.lower() not in mods:
                return "No"

        return "Yes"

    # Evaluate each aircraft against all ADs
    for aircraft in data_list:
        row = {
            "Aircraft Model": aircraft["Aircraft Model"],
            "MSN": aircraft["MSN"],
            "Modifications": aircraft["Modifications"]
        }

        for ad_id, rules in ads.items():
            row[ad_id] = evaluate_aircraft(aircraft, rules)

        results.append(row)

    #dump results to JSON file
    output_filename = "aircraft_vs_ad_evaluation.json"
    with open(output_filename, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=4)

    print(f"Saved evaluation results to {output_filename}")
    print(results)

    return results

In [9]:
#main
async def main():
    pdf_directory1 = "./EASA_AD_2025-0254"
    pdf_directory2 = "./EASA_AD_US-2025-23-53"
    result1, result2 = await asyncio.gather(
    process_folder_pdfs(pdf_directory1),
    process_folder_pdfs(pdf_directory2))

    print("\n--- Final Pydantic Result ---")
    print(f"ad_id: {result1.ad_id}\n")
    print(f"aircraft models: {result1.applicability_rules.aircraft_models}\n")
    print(f"Manufacture Serial Number constraints: {result1.applicability_rules.msn_constraints}\n")
    print(f"excluded modifications: {result1.applicability_rules.excluded_if_mod}\n")
    print(f"required modifications: {result1.applicability_rules.required_mod}\n")
    #result2
    print(f"ad_id: {result2.ad_id}\n")
    print(f"aircraft models: {result2.applicability_rules.aircraft_models}\n")
    print(f"Manufacture Serial Number constraints: {result2.applicability_rules.msn_constraints}\n")
    print(f"excluded modifications: {result2.applicability_rules.excluded_if_mod}\n")
    print(f"required modifications: {result2.applicability_rules.required_mod}\n")
    dump_analysis_to_json_by_ad_id([result1, result2])
    checkedaircrafts= check_aircraft_against_ads(data_list,"combined_ad_analysis.json")

try:
    # If already inside event loop (Jupyter)
    loop = asyncio.get_running_loop()
except RuntimeError:
    loop = None

if loop and loop.is_running():
    # Jupyter/Colab-safe: run async function without crashing
    await main()
else:
    asyncio.run(main())


---------- RAW MODEL OUTPUT ----------
{
  "ad_id": "2025-23-53",
  "applicability_rules": {
    "aircraft_models": [
      "MD-11",
      "MD-11F",
      "MD-10-10F",
      "MD-10-30F",
      "DC-10-10",
      "DC-10-10F",
      "DC-10-15",
      "DC-10-30",
      "DC-10-30F (KC-10A and KDC-10)",
      "DC-10-40",
      "DC-10-40F"
    ],
    "msn_constraints": null,
    "excluded_if_mod": [],
    "required_mod": []
  }
}
--------------------------------------


---------- RAW MODEL OUTPUT ----------
{
  "ad_id": "2025-0254",
  "applicability_rules": {
    "aircraft_models": [
      "Airbus A320-211",
      "Airbus A320-212",
      "Airbus A320-214",
      "Airbus A320-215",
      "Airbus A320-216",
      "Airbus A320-231",
      "Airbus A320-232",
      "Airbus A320-233",
      "Airbus A321-111",
      "Airbus A321-112",
      "Airbus A321-131"
    ],
    "msn_constraints": "all",
    "excluded_if_mod": [
      "mod 24591",
      "SB A320-57-1089 at Revision 04",
      "mod 24977"
 

The output Json will be like this:
[
    {
        "Aircraft Model": "MD-11",
        "MSN": 48123,
        "Modifications": "None",
        "2025-0254": "Not Applicable",
        "2025-23-53": "No"
    },
    {
        "Aircraft Model": "DC-10-30F",
        "MSN": 47890,
        "Modifications": "None",
        "2025-0254": "Not Applicable",
        "2025-23-53": "Not Applicable"
    },
    {
        "Aircraft Model": "Boeing 737-800",
        "MSN": 30123,
        "Modifications": "None",
        "2025-0254": "Not Applicable",
        "2025-23-53": "Not Applicable"
    },
    {
        "Aircraft Model": "A320-214",
        "MSN": 5234,
        "Modifications": "None",
        "2025-0254": "Yes",
        "2025-23-53": "Not Applicable"
    },
    {
        "Aircraft Model": "A320-232",
        "MSN": 6789,
        "Modifications": "mod 24591 (production)",
        "2025-0254": "No",
        "2025-23-53": "Not Applicable"
    },
    {
        "Aircraft Model": "A320-214",
        "MSN": 7456,
        "Modifications": "SB A320-57-1089 Rev 04",
        "2025-0254": "Yes",
        "2025-23-53": "Not Applicable"
    },
    {
        "Aircraft Model": "A321-111",
        "MSN": 8123,
        "Modifications": "None",
        "2025-0254": "Yes",
        "2025-23-53": "Not Applicable"
    },
    {
        "Aircraft Model": "A321-112",
        "MSN": 364,
        "Modifications": "mod 24977 (production)",
        "2025-0254": "No",
        "2025-23-53": "Not Applicable"
    },
    {
        "Aircraft Model": "A319-100",
        "MSN": 9234,
        "Modifications": "None",
        "2025-0254": "Not Applicable",
        "2025-23-53": "Not Applicable"
    },
    {
        "Aircraft Model": "MD-10-10F",
        "MSN": 46234,
        "Modifications": "None",
        "2025-0254": "Not Applicable",
        "2025-23-53": "No"
    }
]

FURTHER EXPLANATION:
-The json is indeed to simulate database interactions, the select, insert in SQL DB. by saving and loading json i intend to simulate the database interaction along with following the requirements.
-The hurdle is while using pydantic is that i come across the realization that i do not know much in the regard of the document and aircraft law, i encountered problems caused by misunderstanding, i thought MSN_constraints was "number of missions it cannot do" so i put int instead of list/string. list because in practical setting there may be few MSN that are excluded instead of null or all. I took the time to discuss Airworthyness Directive topic and it's importance with Gemini.
-The direct comparation by Aircraft Model and MSN should give decisiveness to the analysis because AI having risk of hallucination and aviation industry deals with a lot of human lives.
-While indeed the current data loading from PDF is using AI, we can implement more direct analysis using Regex or other methods to grab the Aircraft Model and MSN, along with modification to provide a truth table for AI to work with, or to audit it's correctness.
-Asyncio is so the program works concurrently, saving time on waiting to time for program to do something. If we deploy in web service that counts compute time like Pythonanywhere this can save our billing seconds.
-Results can be refined, the functions are defined and can be modified as each work step have it's own function, for example the process_folder_pdfs function can be modified to use OCR model or VLLM. this would also put dubugging easier as we can put logging in each function to check for error more directly.
-Thank you for giving me the oppoturnity, if i am accepted i will have to learn ruling and law of aviation to make robust software to hasten aircraft check time while not forgetting safety and i request your understanding and guidance on this matter.