# Convolutional Neural Network Model for Detecting Spoofed Aircraft
### Purpose: This notebook creates a Convolutional Neural Network capable of detecting spoofed aircraft. It was trained using a training data set of pre-generated spoofed aircraft alongside a dataset of valid ADS-B records. It outputs a binary classification (spoofed or not) for a plane alongside confidence level. 

In [1]:
import json 
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Valid data parsing first
with open('./samples/valid/aircraft_valid.json') as json_file:
    data_valid = json.load(json_file)
print(data_valid['aircraft'][0])
print("Valid count: ", len(data_valid['aircraft']))
# Parse spoofed data
with open('./samples/spoofed/aircraft_spoofed_large.json') as json_file:
    data_spoofed = json.load(json_file)
    
aircraft_data_valid = data_valid['aircraft']
aircraft_data_spoofed = data_spoofed['aircraft']

print("Datasets imported!")

{'hex': '845f9f', 'type': 'adsc', 'flight': 'KZ51    ', 'r': 'JA11KZ', 't': 'B748', 'alt_baro': 31996, 'gs': 487.0, 'track': 244.0, 'baro_rate': 48, 'lat': 57.32872, 'lon': -177.562752, 'nic': 0, 'rc': 0, 'seen_pos': 873.399, 'mlat': [], 'tisb': [], 'messages': 4420465, 'seen': 803.5, 'rssi': -29.2}
Valid count:  8423
Datasets imported!


# Process JSON Files Containing Data

In [2]:
# Define a function to preprocess the data
def preprocess_data(data):
    features = []
    labels = []

    for aircraft in data:
        try:
            feature_vector = [
                #alt_baro maybe float?
                handle_alt_baro(aircraft.get('alt_baro', 0)),
                float(aircraft.get('gs', 0)),
                float(aircraft.get('track', 0)),
                float(aircraft.get('baro_rate', 0)),
                float(aircraft.get('lat', 0)),
                float(aircraft.get('lon', 0)),
                float(aircraft.get('seen_pos', 0)),
                float(aircraft.get('messages', 0)),
                float(aircraft.get('seen', 0)),
                float(aircraft.get('rssi', 0)),
            ]
            features.append(feature_vector)
            labels.append(1 if aircraft.get('is_spoofed', False) else 0)
        except ValueError as e:
            # Handle the case where a non-numeric value is encountered
            print(f"Ignoring data point due to non-numeric value: {aircraft}, Field {e}")

    #return np.array(features), np.array(labels)
    features = np.array(features)
    labels = np.array(labels)

    # # Scale features
    scaler = StandardScaler()
    features = scaler.fit_transform(features)

    return features, labels

def handle_alt_baro(alt_baro):
    if alt_baro == "ground":
        return 0.0
    else:
        return alt_baro
    
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(10,)),  # Define input shape explicitly
    
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    
    tf.keras.layers.Dense(1, activation='linear')
])

# model = tf.keras.Sequential([
    # tf.keras.layers.Input(shape=(10,)),
    # # tf.keras.layers.Conv2D(32, kernel_size=(3,3), activation='relu'),
    # # tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    # # tf.keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu'),
    # # tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    # # tf.keras.layers.Flatten(),
    # tf.keras.layers.Dense(64, activation='relu'),
    # tf.keras.layers.Dropout(0.5),
    # tf.keras.layers.Dense(32, activation='relu'),
    # tf.keras.layers.Dense(1, activation='sigmoid')
# ])
# model.Compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.summary()

# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [3]:
spoofed_features, spoofed_labels = preprocess_data(aircraft_data_spoofed)
not_spoofed_features, not_spoofed_labels = preprocess_data(aircraft_data_valid)

# Combine the data
all_features = np.vstack([spoofed_features, not_spoofed_features])
all_labels = np.concatenate([spoofed_labels, not_spoofed_labels])

print(all_features, all_labels)

[[ 1.21698152  0.12338133  0.30270961 ...  1.3314093   0.79770954
   1.07453948]
 [ 0.41029097  0.62593615  0.55088812 ...  0.39182697 -1.23409194
  -0.62562485]
 [-1.15802799  1.48547818 -1.42499466 ... -1.5802634   0.72848291
   0.23125797]
 ...
 [-1.10993969 -1.32685152 -1.21581286 ... -1.17306362  0.07194092
  -1.81647978]
 [-1.10993969 -1.32685152 -1.21581286 ... -0.10443824  0.02871073
   0.06106721]
 [-1.10993969 -1.32685152 -1.21581286 ...  0.62940328 -0.11199928
   1.19022134]] [1 1 1 ... 0 0 0]


In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels, test_size=0.2, random_state=42)

# Train the model !!! ADDED CLASS_WEIGHT, MIGHT NOT BE NEEDED
#class_weight={0: 1., 1: 10.}
model.fit(X_train, y_train, epochs=35, batch_size=32, validation_split=0.2)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

Epoch 1/35
[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.5515 - loss: 5.6210 - val_accuracy: 0.6530 - val_loss: 3.9843
Epoch 2/35
[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6235 - loss: 4.5733 - val_accuracy: 0.6816 - val_loss: 3.3897
Epoch 3/35
[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6301 - loss: 4.5814 - val_accuracy: 0.6683 - val_loss: 3.7816
Epoch 4/35
[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6437 - loss: 4.4080 - val_accuracy: 0.6764 - val_loss: 3.6723
Epoch 5/35
[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 961us/step - accuracy: 0.6486 - loss: 4.2567 - val_accuracy: 0.7273 - val_loss: 2.8838
Epoch 6/35
[1m337/337[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 943us/step - accuracy: 0.6722 - loss: 3.8336 - val_accuracy: 0.7645 - val_loss: 2.6691
Epoch 7/35
[1m337/337[

In [5]:
model_name = "Spoof_DetectionTest.keras"
model.save(model_name)
print(f"Model saved to {model_name}!")

Model saved to Spoof_DetectionTest.keras!


# References and Resources
#### [Reference dump1090 README](https://github.com/SDRplay/dump1090/blob/master/README-json.md)
#### [Data Samples from ADSB Exchange](https://www.adsbexchange.com/data-samples/)
#### [2022 Data Sample from ADSB Exchange](https://samples.adsbexchange.com/readsb-hist/2022/05/01/)
#### [ADSB Exchange API](https://www.adsbexchange.com/version-2-api-wip/)
#### [Raspberry Pi & FlightAware Setup](https://elmwoodelectronics.ca/blogs/news/tracking-and-logging-flights-with-ads-b-flight-aware-and-raspberry-pi)
#### [IEEE Research on ADS-B Signals](https://ieeexplore.ieee.org/document/9377975)