In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import StackingClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression

# Load the dataset
data = pd.read_csv('../ADC_fil.csv')


In [2]:
# Encode categorical variables
label_encoders = {}
categorical_cols = ['Aircraft.damage', 'Aircraft.Category', 'Make.Grouped', 'Engine.Type', 'Purpose.of.flight', 'Weather.Condition', 'Broad.phase.of.flight']
for column in categorical_cols:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le


In [3]:
# Define features and target
X = data[['Aircraft.damage', 'Aircraft.Category', 'Make.Grouped', 'Number.of.Engines', 'Engine.Type', 'Purpose.of.flight', 'Weather.Condition', 'Broad.phase.of.flight']]
y = data['Injury.Target']

# Train-test split: 80-20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42, stratify=y)


In [4]:
# Define base models
base_models = [
    ('gb', GradientBoostingClassifier(random_state=42)),
    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)),
    ('mlp', MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42))
]

# Define the stacking classifier
stacking_clf = StackingClassifier(
    estimators=base_models,
    final_estimator=LogisticRegression(max_iter=1000, class_weight='balanced'),
    cv=5  # Number of folds for cross-validation
)

# Train the stacking classifier
stacking_clf.fit(X_train, y_train)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [25]:
# Example of new data for prediction
new_data = pd.DataFrame({
    'Aircraft.damage': ['Destroyed'],
    'Aircraft.Category': ['Helicopter'],
    'Make.Grouped': ['Bell Helicopter'],
    'Number.of.Engines': [1],
    'Engine.Type': ['Turbo Shaft'],
    'Purpose.of.flight': ['Special Ops'],
    'Weather.Condition': ['IMC'],
    'Broad.phase.of.flight': ['Landing']
})



In [26]:
# Encode the new data using the same label encoders
for column in categorical_cols:
    if column in new_data.columns:
        new_data[column] = label_encoders[column].transform(new_data[column])


In [27]:
# Make predictions using the trained stacking classifier
predictions = stacking_clf.predict(new_data)
prediction_probabilities = stacking_clf.predict_proba(new_data)


In [28]:
print("Predictions:", predictions)
print("Prediction Probabilities:", prediction_probabilities)

Predictions: [1]
Prediction Probabilities: [[0.2232987 0.7767013]]
