In [1]:
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
df = pd.read_csv("flights.csv")
parameters_data = pd.read_csv("parameters.csv")
flight_100 = pd.read_csv("flights/100.csv")

  df = pd.read_csv("flights.csv")


In [3]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1):
        super(Encoder, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)

    def forward(self, x):
        outputs, (hidden, cell) = self.lstm(x)
        return hidden, cell

class Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1):
        super(Decoder, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x, hidden, cell):
        output, (hidden, cell) = self.lstm(x, (hidden, cell))
        prediction = self.fc(output[:, -1, :])
        return prediction, hidden, cell
        
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, source, target_len):
        batch_size = source.size(0)
        hidden, cell = self.encoder(source)

        decoder_input = torch.zeros(batch_size, 1, 1, device=source.device)  # Starting input for    decoder, we can use the last input in encoder as well
        outputs = torch.zeros(batch_size, target_len, 1, device=source.device)

        for t in range(target_len):
            output, hidden, cell = self.decoder(decoder_input, hidden, cell)
            outputs[:, t] = output.view(batch_size, 1)
            decoder_input = output.view(batch_size, 1, 1)

        return outputs

In [4]:
# Select relevant features
features = ['battery_voltage', 'battery_current', 'wind_speed', 'wind_angle', 'altitude', 'payload', 'velocity_x', 'velocity_y', 'velocity_z']
target = 'time'  # Replace with the actual target column name

def parse_altitude(altitude_str):
    altitude_str = str(altitude_str)
    altitudes = [int(alt) for alt in altitude_str.split('-')]
    max_altitude = max(altitudes)
    min_altitude = min(altitudes)
    mean_altitude = sum(altitudes) / len(altitudes)
    return pd.Series([max_altitude, min_altitude, mean_altitude], index=['max_altitude', 'min_altitude', 'mean_altitude'])
    
# Function to create sequences 
def create_sequences(input_data, output_data, sequence_length):
    sequences = []
    output = []
    for i in range(len(input_data) - sequence_length):
        sequences.append(input_data[i:i+sequence_length])
        output.append(output_data[i+sequence_length])
    return np.array(sequences), np.array(output)


# Apply the function to the altitude column and join with the original dataframe
altitude_features = df['altitude'].apply(parse_altitude)
df = df.join(altitude_features)

# Now remove the original 'altitude' column as it's been replaced with numeric features
df.drop('altitude', axis=1, inplace=True)

# Select relevant features including the new altitude features
features = ['battery_voltage', 'battery_current', 'wind_speed', 'wind_angle', 'max_altitude', 'min_altitude', 'mean_altitude', 'payload', 'velocity_x', 'velocity_y', 'velocity_z']

# Apply MinMaxScaler to the numeric features only
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

In [5]:
sequence_length = 50 
input_sequences, output_sequences = create_sequences(df[features].values, df[target].values, sequence_length)

# Split the data into training, validation, and test sets 
X_train, X_test, y_train, y_test = train_test_split(input_sequences, output_sequences, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

In [6]:
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [9]:
# create dataloader
from torch.utils.data import TensorDataset, DataLoader

batch_size = 64  # Example batch size

train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)

val_data = TensorDataset(X_val, y_val)
val_loader = DataLoader(val_data, batch_size=batch_size)

test_data = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_data, batch_size=batch_size)

In [15]:
# Train model 
# Assuming the model is already defined as per your provided classes
from tqdm import tqdm

input_size = len(features)
hidden_size = 100  # Example size, adjust as needed
num_layers = 3    # Example number of layers

encoder = Encoder(input_size, hidden_size, num_layers)
decoder = Decoder(1, hidden_size, num_layers)  # Decoder input size is 1 since we predict one step at a time
seq2seq_model = Seq2Seq(encoder, decoder)

# Loss and optimizer
criterion = torch.nn.L1Loss()
optimizer = torch.optim.Adam(seq2seq_model.parameters(), lr=0.0001)

# Training loop with validation and early stopping
num_epochs = 5  # Example epochs
best_val_loss = float('inf')
early_stopping_patience = 3  # Number of epochs to wait after last time validation loss improved.
early_stopping_counter = 0  # Counter for early stopping

for epoch in range(num_epochs):
    # Training phase
    seq2seq_model.train()
    total_train_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [TRAIN]")
    for inputs, targets in progress_bar:
        optimizer.zero_grad()
        outputs = seq2seq_model(inputs, 1)  # Predict one step
        outputs = outputs.squeeze(-1)  # Remove the singleton dimension
        targets = targets.view_as(outputs)  # Make sure targets is the same shape
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        progress_bar.set_postfix({'train_loss': loss.item()})

    average_train_loss = total_train_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Average Training Loss: {average_train_loss:.4f}")

    # Validation phase
    seq2seq_model.eval()
    total_val_loss = 0
    with torch.no_grad():
        progress_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [VAL]")
        for inputs, targets in progress_bar:
            outputs = seq2seq_model(inputs, 1)
            outputs = outputs.squeeze(-1)
            targets = targets.view_as(outputs)
            loss = criterion(outputs, targets)

            total_val_loss += loss.item()
            progress_bar.set_postfix({'val_loss': loss.item()})

    average_val_loss = total_val_loss / len(val_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Average Validation Loss: {average_val_loss:.4f}")

    # Check for early stopping
    if average_val_loss < best_val_loss:
        best_val_loss = average_val_loss
        torch.save(seq2seq_model.state_dict(), 'best_model.pth')
        print("Saved the new best model.")
        early_stopping_counter = 0  # reset counter if validation loss improves
    else:
        early_stopping_counter += 1
        print(f"Validation loss did not improve. Counter {early_stopping_counter}/{early_stopping_patience}")
        if early_stopping_counter >= early_stopping_patience:
            print("Early stopping triggered.")
            break  # Stop training


Epoch 1/5 [TRAIN]: 100%|███| 2418/2418 [02:09<00:00, 18.65it/s, train_loss=55.4]


Epoch 1/5, Average Training Loss: 82.2091


Epoch 1/5 [VAL]: 100%|█████████| 806/806 [00:26<00:00, 30.01it/s, val_loss=80.8]


Epoch 1/5, Average Validation Loss: 73.3108
Saved the new best model.


Epoch 2/5 [TRAIN]: 100%|███| 2418/2418 [02:09<00:00, 18.67it/s, train_loss=68.5]


Epoch 2/5, Average Training Loss: 66.9698


Epoch 2/5 [VAL]: 100%|█████████| 806/806 [00:25<00:00, 31.37it/s, val_loss=66.4]


Epoch 2/5, Average Validation Loss: 61.5494
Saved the new best model.


Epoch 3/5 [TRAIN]: 100%|███| 2418/2418 [02:13<00:00, 18.18it/s, train_loss=54.1]


Epoch 3/5, Average Training Loss: 57.8259


Epoch 3/5 [VAL]: 100%|█████████| 806/806 [00:26<00:00, 30.34it/s, val_loss=56.9]


Epoch 3/5, Average Validation Loss: 54.7816
Saved the new best model.


Epoch 4/5 [TRAIN]: 100%|███| 2418/2418 [02:14<00:00, 17.93it/s, train_loss=57.2]


Epoch 4/5, Average Training Loss: 53.1654


Epoch 4/5 [VAL]: 100%|█████████| 806/806 [00:25<00:00, 31.69it/s, val_loss=52.1]


Epoch 4/5, Average Validation Loss: 51.9501
Saved the new best model.


Epoch 5/5 [TRAIN]: 100%|███| 2418/2418 [02:20<00:00, 17.27it/s, train_loss=44.3]


Epoch 5/5, Average Training Loss: 51.6331


Epoch 5/5 [VAL]: 100%|███████████| 806/806 [00:25<00:00, 31.79it/s, val_loss=51]

Epoch 5/5, Average Validation Loss: 51.3827
Saved the new best model.





In [16]:
# Load the best model
seq2seq_model.load_state_dict(torch.load('best_model.pth'))

# Test the model
seq2seq_model.eval()
with torch.no_grad():
    total_loss = 0
    for inputs, targets in test_loader:
        outputs = seq2seq_model(inputs, 1)
        outputs = outputs.squeeze(-1)  # Make sure the output tensor has shape [batch_size]
        targets = targets.view_as(outputs)  # Ensure targets tensor has the same shape as outputs
        loss = criterion(outputs, targets)
        total_loss += loss.item()
    print(f"Average test loss: {total_loss / len(test_loader)}")

Average test loss: 51.677054729414344
