In [1]:
import json
import pandas as pd
from pathlib import Path
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm

In [2]:
# ===============================================================
# üîß LOAD DATA
# ===============================================================
DATASET_PATH = Path(r"D:\\Project-Vimaan\\Dataset\\f_output\\aviation_cmds.jsonl")

data = []
with open(DATASET_PATH, "r", encoding="utf-8") as f:
    for line in f:
        try:
            entry = json.loads(line)
            text = entry["text"]
            intent = entry["intent"]
            slots = entry.get("slots", {})
            slot_str = "|".join(f"{k}={v}" for k, v in slots.items())
            combined_label = f"{intent}|{slot_str}"  # e.g. LANDING_GEAR|state=UP
            data.append({"text": text, "label": combined_label})
        except json.JSONDecodeError:
            continue

df = pd.DataFrame(data)
print(f"‚úÖ Loaded {len(df)} samples from {DATASET_PATH.name}")

‚úÖ Loaded 19896 samples from aviation_cmds.jsonl


In [3]:
# ===============================================================
# üß† SPLIT DATA
# ===============================================================
X_train, X_test, y_train, y_test = train_test_split(
    df["text"], df["label"], test_size=0.2, random_state=42, stratify=df["label"]
)

In [4]:
# ===============================================================
# ‚öôÔ∏è TRAIN JOINT MODEL
# ===============================================================
print("\nüöÄ Training Joint Intent+Slot Model...")

joint_model = Pipeline([
    ("tfidf", TfidfVectorizer(
        ngram_range=(1, 2),
        sublinear_tf=True,
        lowercase=True,
        stop_words=None
    )),
    ("clf", LogisticRegression(max_iter=2000, class_weight="balanced"))
])

joint_model.fit(X_train, y_train)


üöÄ Training Joint Intent+Slot Model...


0,1,2
,steps,"[('tfidf', ...), ('clf', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,input,'content'
,encoding,'utf-8'
,decode_error,'strict'
,strip_accents,
,lowercase,True
,preprocessor,
,tokenizer,
,analyzer,'word'
,stop_words,
,token_pattern,'(?u)\\b\\w\\w+\\b'

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'lbfgs'
,max_iter,2000


In [5]:
# ===============================================================
# üß© EVALUATION
# ===============================================================
y_pred = joint_model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print(f"\n‚úÖ Overall Accuracy: {acc:.4f}\n")
print(classification_report(y_test, y_pred, digits=4))


‚úÖ Overall Accuracy: 0.8814

                            precision    recall  f1-score   support

        ALTITUDE|state=100     1.0000    1.0000    1.0000        19
       ALTITUDE|state=1000     0.9474    1.0000    0.9730        18
      ALTITUDE|state=10000     1.0000    0.8333    0.9091        18
       ALTITUDE|state=1100     1.0000    1.0000    1.0000        18
       ALTITUDE|state=1200     1.0000    1.0000    1.0000        17
       ALTITUDE|state=1300     1.0000    1.0000    1.0000        17
       ALTITUDE|state=1400     1.0000    1.0000    1.0000        17
       ALTITUDE|state=1500     1.0000    1.0000    1.0000        18
       ALTITUDE|state=1600     1.0000    1.0000    1.0000        18
       ALTITUDE|state=1700     1.0000    1.0000    1.0000        17
       ALTITUDE|state=1800     1.0000    1.0000    1.0000        16
       ALTITUDE|state=1900     0.9444    1.0000    0.9714        17
        ALTITUDE|state=200     1.0000    1.0000    1.0000        18
       ALTITUDE|

In [6]:
# ===============================================================
# üîç INFERENCE FUNCTION
# ===============================================================
def predict_command(text: str):
    """Predict intent and slots from raw command text."""
    label = joint_model.predict([text])[0]
    if "|" in label:
        intent, slot_str = label.split("|", 1)
        slots = dict(pair.split("=") for pair in slot_str.split("|") if "=" in pair)
    else:
        intent = label
        slots = {}
    return {"text": text, "intent": intent, "slots": slots}

In [9]:
# ===============================================================
# üß™ TEST SAMPLE
# ===============================================================
samples = [
    "can you please do the gear retract",
    "start engine 1",
    "xxx",
    "flaps retract"
]

print("\nüîé Sample Predictions:")
for s in samples:
    pred = predict_command(s)
    print(f"{s:25s} ‚Üí {pred}")


üîé Sample Predictions:
can you please do the gear retract ‚Üí {'text': 'can you please do the gear retract', 'intent': 'LANDING_GEAR', 'slots': {'state': 'UP'}}
start engine 1            ‚Üí {'text': 'start engine 1', 'intent': 'ENGINE_1', 'slots': {'state': 'ON'}}
xxx                       ‚Üí {'text': 'xxx', 'intent': 'AUTOPILOT_1', 'slots': {'state': 'OFF'}}
flaps retract             ‚Üí {'text': 'flaps retract', 'intent': 'FLAPS', 'slots': {'state': 'UP'}}
