# Loading and Preparing Drop Sample Data from CSV Files

In [None]:
import pandas as pd
import numpy as np
import glob


dir_path_dropsamples = r"C:\\Users\\Desktop\\"


filenames = glob.glob(dir_path_dropsamples + "*.csv")
data = []
usecols = [8,21]  # Specified columns to use

for filename in filenames:
    num_rows = sum(1 for line in open(filename))
    if num_rows < 26:
        continue
    skip_rows = max(0, num_rows - 30)  # Adjust to ensure reading up to the last 30 rows
    df = pd.read_csv(filename, usecols=usecols, skiprows=skip_rows)
    #df = df.dropna()
    arr = df.values.astype(float)
    # Check if the array has fewer rows than expected and pad if necessary
    if arr.shape[0] < 30:
        pad_size = 30 - arr.shape[0]
        # Use the last row of arr for padding
        last_row = arr[-1:]
        padding = np.repeat(last_row, pad_size, axis=0)
        arr = np.vstack([padding, arr])  # Prepend the padding
    data.append(arr)

if len(data) == 0:
    print("No data found")
else:
    array1 = np.stack(data, axis=0)
    #print(array1.shape)
    array1 = np.stack(data, axis=0)
    print(array1.shape)

    # Check for NaN values
    nan_indices = np.argwhere(np.isnan(array1))
    if len(nan_indices) > 0:
        print("NaN values found at indices:", nan_indices)
    else:
        print("No NaN values found")


In [None]:
import pandas as pd
import numpy as np
import glob

# Path to Non-drop Samples
dir_path_dropsamples = r"C:\\Users\\Desktop\\"
filenames = glob.glob(dir_path_dropsamples + "*.csv")

data = []

for filename in filenames:
    num_rows = sum(1 for line in open(filename))
    if num_rows < 26:
        continue
    skip_rows = max(0, num_rows - 30)
    df = pd.read_csv(filename, usecols=usecols, skiprows=skip_rows)
    
    # Drop rows with any NaN values
    df = df.dropna()
    
    arr = df.values.astype(float)
    
    if arr.shape[0] < 30:
        pad_size = 30 - arr.shape[0]
        last_row = arr[-1:]
        padding = np.repeat(last_row, pad_size, axis=0)
        arr = np.vstack([padding, arr])
    
    data.append(arr)

if len(data) == 0:
    print("No data found")
else:
    array2 = np.stack(data, axis=0)
    print(array2.shape)

    nan_indices = np.argwhere(np.isnan(array2))
    if len(nan_indices) > 0:
        print("NaN values found at indices:", nan_indices)
    else:
        print("No NaN values found")


# Data Preprocessing: Combining and Splitting Data for Model Training and Testing

In [None]:
from sklearn.model_selection import train_test_split

combined_array = np.concatenate((array1, array2), axis=0)


# Split the data into training, validation and label the samples 
DROP_label = np.concatenate((np.ones(array1.shape[0]), np.zeros(array2.shape[0])))
train_data, test_data, train_labels, test_labels = train_test_split(combined_array, DROP_label, test_size=0.3, random_state=54999)

print(combined_array.shape)
print(train_data.shape)
print(test_labels.shape)

# LSTM Model Training, Evaluation, and Performance Metrics Calculation

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import precision_score, recall_score, f1_score


def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(30, input_shape=input_shape))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

input_shape = (train_data.shape[1], train_data.shape[2])
modelLSTM = create_lstm_model(input_shape)

history = modelLSTM.fit(train_data, train_labels, epochs=10, batch_size=64, validation_data=(test_data, test_labels))

test_loss, test_acc = modelLSTM.evaluate(test_data, test_labels)
print('Test accuracy:', test_acc)

# Make predictions
test_predictions = modelLSTM.predict(test_data)
test_predictions = (test_predictions > 0.5).astype(np.int32)  # Convert probabilities to binary predictions

# Calculate Precision score
precision = precision_score(test_labels, test_predictions)

# Calculate Recall score
recall = recall_score(test_labels, test_predictions)

# Calculate F1 score (this was already in your code, but included for completeness)
f1 = f1_score(test_labels, test_predictions)

# Print the results
print('Precision:', precision)
print('Recall:', recall)
print('F1 Score:', f1)


# Visualization of Flight Parameters and LSTM-Based Drop Predictions

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# Load the data
file_path = r"C:\Users\Desktop\T123_2020_213_00_05_11.csv"
df = pd.read_csv(file_path)

predictions = []  # Assuming you have a list of predictions
for start_row in range(len(df) - 30):
    data = df.iloc[start_row:start_row+30, [8, 21]].values
    data_reshaped = data.reshape(1, 30, 2).astype('float32')
    prediction = modelLSTM.predict(data_reshaped)  # Assuming modelLSTM is defined
    predictions.append(prediction[0][0])

predictions_df = pd.DataFrame(predictions, columns=['Prediction'])

# Create a 3-panel plot
fig, axs = plt.subplots(3, 1, figsize=(10, 18))

# Plot for vertical rate
axs[0].plot(df['time'], df['vertrate'], color='black')
axs[0].set_xlabel('Time')
axs[0].set_ylabel('Vertical Rate (m/s)')
axs[0].axvline(x=1596153907, color='red', linestyle='--')

# Plot for height AGL
axs[1].plot(df['time'], df['heightAGL'], color='black')
axs[1].set_xlabel('Time')
axs[1].set_ylabel('Height AGL (m)')
axs[1].axvline(x=1596153907, color='red', linestyle='--')

# Plot for predictions
axs[2].plot(predictions_df['Prediction'], color='black')
axs[2].set_xlabel('Time Step')
axs[2].set_ylabel('Predicted Drop Likelihood')
axs[2].set_ylim(0, 1)

# Adjust layout and show the plot
plt.tight_layout()
plt.show()
