In [58]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import csv
import pandas as pd
from random import seed
import matplotlib.pyplot as plt 
import numpy as np
SEED = 12345
seed(SEED)

In [56]:
df_planes = pd.read_csv('training_data.csv')
# df_planes['is_hover'] = df_planes['is_hover'].astype('category')
df_planes['squawk'] = df_planes['squawk'].astype('category')

features = list(set(df_planes.columns) - set(["icao_hex", "nnum", "start_time", "end_time", "is_hover", "record_cnt"]))


train_df, test_df = train_test_split(df_planes, 
                                           test_size=0.2, 
                                           shuffle=True)


In [81]:
clf = RandomForestClassifier(n_estimators=1000, max_depth=2,
                              max_features=0.33,
                              oob_score=True,
                              class_weight="balanced",
                              random_state=SEED
                            )
clf.fit(train_df[features], train_df[['is_hover']].values.ravel())
assert set(clf.classes_) == set([True, False])

importances = clf.feature_importances_
indices = np.argsort(importances)[::-1]

predicted_labels = clf.predict(test_df[features])
print(classification_report(test_df["is_hover"], predicted_labels ))
conf_mat = confusion_matrix(test_df["is_hover"], predicted_labels, labels=[True, False])
print("✓+: {}   | ✗+: {}\n✗-: {}   | ✓-: {}\n ".format(*[s for l in conf_mat.tolist() for s in l]))
print(conf_mat.tolist())
max_feature_strlen = max(map(len, features))
for feature, importance in sorted(list(zip(features, clf.feature_importances_)), key=lambda x: -x[1] ):
    print("{}: {}%".format(str.rjust(feature, max_feature_strlen), round(importance * 100, 2)))

              precision    recall  f1-score   support

       False       0.97      0.86      0.91       177
        True       0.42      0.78      0.55        23

   micro avg       0.85      0.85      0.85       200
   macro avg       0.69      0.82      0.73       200
weighted avg       0.90      0.85      0.87       200

✓+: 18   | ✗+: 5
✗-: 25   | ✓-: 152
 
[[18, 5], [25, 152]]
        speed2: 22.67%
        steer2: 20.86%
        steer3: 13.45%
        steer5: 11.0%
      xysteer5: 5.84%
        steer1: 5.28%
        speed3: 4.94%
        steer4: 2.6%
     altitude5: 2.09%
        speed1: 1.98%
     altitude1: 1.72%
vertical_rate1: 1.19%
        speed5: 1.11%
      xysteer4: 1.1%
vertical_rate2: 1.03%
vertical_rate5: 0.68%
vertical_rate3: 0.55%
     altitude3: 0.52%
     altitude2: 0.44%
        squawk: 0.37%
vertical_rate4: 0.36%
     altitude4: 0.1%
        speed4: 0.1%
      xysteer1: 0.0%
      xysteer3: 0.0%
      xysteer2: 0.0%
