In [1]:
import torch
import numpy as np

from ManeuverDetectionDataset import ManeuverDetectionDataset, IrregularDataset, SlidingWindowDataset
from torch.utils.data import DataLoader

def get_l_out(l_in, kernel_size, padding=0, dilation=1, stride=1):
    return np.floor((l_in + 2 * padding - dilation * (kernel_size - 1) -1)/stride + 1)

class ConvBlock1d(torch.nn.Module):
    def __init__(self, conv_kwargs, pool_kwargs, dropout_rate) -> None:
        super().__init__()

        # https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
        self.conv = torch.nn.Conv1d(**conv_kwargs)
        self.activation_fn = torch.nn.ReLU()
        # https://pytorch.org/docs/stable/generated/torch.nn.MaxPool1d.html
        self.pooling = torch.nn.MaxPool1d(**pool_kwargs)
        self.dropout = torch.nn.Dropout(dropout_rate)

    def forward(self, x):
        x = self.conv(x)
        x = self.activation_fn(x)
        x = self.pooling(x)
        x = self.dropout(x)
        return x

class Cnn1d(torch.nn.Module):
    def __init__(self, block_kwargs_list, linear_kwargs) -> None: # use_dv_head=False, use_date_head=False
        super().__init__()
        self.block_kwargs_list = block_kwargs_list
        ll = []
        for block_kwargs in block_kwargs_list:
            ll.append(ConvBlock1d(**block_kwargs))

        self.convnet = torch.nn.Sequential(*ll)
        # https://pytorch.org/docs/stable/generated/torch.nn.Linear.html
        self.fcnn = torch.nn.Linear(**linear_kwargs)
        self.activation_fn = torch.nn.ReLU()

        self.classification_head = torch.nn.Sequential(*[
            torch.nn.Linear(linear_kwargs['out_features'], 2),
            torch.nn.Softmax(dim=-1)
        ])
        
    def forward(self, x):
        # main model
        x = self.convnet(x)
        x = torch.flatten(x, start_dim=1) # size : batch size x length
        x = self.fcnn(x)
        x = self.activation_fn(x)

        # classification head
        c = self.classification_head(x)
        return c
    
    def predict(self, x, return_embedding=False):
        self.eval()
        # main model
        x = self.convnet(x)
        x = torch.flatten(x, start_dim=1) # size : batch size x length
        x = self.fcnn(x)
        x = self.activation_fn(x)

        # classification head
        c = self.classification_head(x)
        if(return_embedding):
            return c, x
        return x


In [35]:
from tqdm import tqdm

class TorchTrainer:
    def __init__(self, model, lr, verbose=True, weight=None, index_y=0, loss_function="CrossEntropyLoss") -> None:
        self.model = model
        self.lr = lr
        self.weight = weight
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = self.model.to(self.device)
        if(loss_function == 'CrossEntropyLoss'):
            self.loss_function = torch.nn.CrossEntropyLoss(weight=self.weight) # torch.tensor([0.05, 0.95]))
        elif(loss_function == "L1Loss"):
            self.loss_function = torch.nn.L1Loss()
        self.optimiser = torch.optim.SGD(self.model.parameters(), lr=lr, momentum=0.9)
        self.verbose = verbose
        self.index_y = index_y
        self.scheduler = torch.optim.lr_scheduler.CyclicLR(self.optimiser, base_lr=lr, max_lr=10 * lr)
    
    def train(self, train_loader, epochs):
        train_loss_list = []
        for epoch in tqdm(range(epochs), disable=not self.verbose):
            train_loss = self.train_one_epoch(train_loader)
            train_loss_list.append(train_loss)
        return train_loss_list

    def train_one_epoch(self, train_loader):
        loss_list = []
        for x, y in train_loader:
            x = x.to(self.device)
            y = tuple([y_.to(self.device) for y_ in y])
            pred = self.model(x)
            self.optimiser.zero_grad()
            loss = self.loss_function(pred, y[self.index_y])
            loss.backward()
            self.optimiser.step()
            loss_list.append(loss.detach().cpu().numpy())
            self.scheduler.step()
        return np.mean(loss_list)

    def predict(self, test_loader, return_true=False):
        self.model.eval()
        c_pred_list = []
        c_true_list = []
        with torch.no_grad():
            for x, y in test_loader:
                x = x.to(self.device)
                y = tuple([y_.to(self.device) for y_ in y])
                pred = self.model(x)
                c_pred_list.append(pred)
                # dv_pred_list.append(pred[1])
                c_true_list.append(y[self.index_y])
                # dv_true_list.append(y[1])
        self.model.train()

        pred_tuple = (torch.concatenate(c_pred_list, axis=0),)
                # torch.concatenate(dv_pred_list, axis=0))        
        if(return_true):
            true_tuple = (torch.concatenate(c_true_list, axis=0),)
                # torch.concatenate(dv_true_list, axis=0))
            return true_tuple, pred_tuple
        return pred_tuple

#     def score(self, test_loader):
#         true, pred = self.predict(test_loader, return_true=True)
#         accuracy = get_accuracy(true.cpu().numpy(), pred.cpu().numpy())
#         return accuracy

# def get_accuracy(y_true, y_prob):
#     assert (y_true.ndim == 1 and y_true.shape[0] == y_prob.shape[0])
#     y_prob = np.argmax(y_prob, axis=-1)
#     return sklearn.metrics.accuracy_score(y_true, y_prob)

# Phase 1 :
Detect which time series contain maneuvers.

CNN to determine on full time series, not evenly spaced, if it contains a maneuver or not.

Fixed size of the time series : 1000 (48h of data).

In [3]:
block_kwargs_list_1000 = [
    { # layer 1
        'conv_kwargs': {
            'in_channels': 3,
            'out_channels': 6,
            'kernel_size': 7,
            'stride': 1,
            'padding': 0,
            'dilation': 1,
            'groups': 1,
            'bias': True,
            'padding_mode': 'zeros'
        },
        'pool_kwargs': {
            'kernel_size': 7,
            'stride': None,
            'padding': 0,
            'dilation': 1
        },
        'dropout_rate': 0.3
    },
    { # layer 2
        'conv_kwargs': {
            'in_channels': 6,
            'out_channels': 12,
            'kernel_size': 7,
            'stride': 1,
            'padding': 0,
            'dilation': 1,
            'groups': 1,
            'bias': True,
            'padding_mode': 'zeros'
        },
        'pool_kwargs': {
            'kernel_size': 7,
            'stride': None,
            'padding': 0,
            'dilation': 1
        },
        'dropout_rate': 0.3
    },
    { # layer 3
        'conv_kwargs': {
            'in_channels': 12,
            'out_channels':12,
            'kernel_size': 7,
            'stride': 1,
            'padding': 0,
            'dilation': 1,
            'groups': 1,
            'bias': True,
            'padding_mode': 'zeros'
        },
        'pool_kwargs': {
            'kernel_size': 5,
            'stride': None,
            'padding': 0,
            'dilation': 1
        },
        'dropout_rate': 0.3
    }
]
linear_kwargs_1000 = {
    'in_features': 24,
    'out_features': 10 # size of the projection space (dimension reduction)
}

conv_net_1000 = Cnn1d(block_kwargs_list_1000, linear_kwargs_1000).float()

# test
c = conv_net_1000(torch.zeros(4, 3, 1000).float())

## Data

In [4]:

# IRREGULAR firstdv_net
dataset_path="DATA/TRAIN_1_IRREGULAR_STEPS_V2.json"
train_dataset = IrregularDataset(ManeuverDetectionDataset(dataset_path, dataset_type="TRAIN"))
valid_dataset = IrregularDataset(ManeuverDetectionDataset(dataset_path, dataset_type="VALIDATION"))

train_loader = DataLoader(train_dataset, batch_size=8, drop_last=True, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=8, drop_last=True, shuffle=False)



**********TRAIN DATASET *********
Validation/Train ratio: 0.1
Samples filtered? NO
Samples evenly spaced? False
path: DATA/TRAIN_1_IRREGULAR_STEPS_V2.json
loading dataset. Ready in a minute!
TRAIN Dataset loaded. Size: 35762


**********VALIDATION DATASET *********
Validation/Train ratio: 0.1
Samples filtered? NO
Samples evenly spaced? False
path: DATA/TRAIN_1_IRREGULAR_STEPS_V2.json
loading dataset. Ready in a minute!
VALIDATION Dataset loaded. Size: 3974


In [42]:
trainer = TorchTrainer(model=conv_net_1000, lr=1e-4)

In [52]:
trainer.train(train_loader, epochs=3)

100%|██████████| 3/3 [00:41<00:00, 13.87s/it]


[0.48627296, 0.48849425, 0.48689064]

In [54]:
true_valid, pred_valid = trainer.predict(valid_loader, return_true=True)
true_train, pred_train = trainer.predict(train_loader, return_true=True)

In [55]:
print(torch.count_nonzero(torch.argmax(pred_valid[0], dim=1) == true_valid[0])/true_valid[0].shape[0])
print(torch.count_nonzero(torch.argmax(pred_train[0], dim=1) == true_train[0])/true_train[0].shape[0])

tensor(0.8291)
tensor(0.8280)


In [56]:
from sklearn import metrics
y_true, y_pred = true_train[0].cpu().numpy(), torch.argmax(pred_train[0], dim=1).cpu().numpy()
metrics.confusion_matrix(y_true, y_pred)

array([[17830,    20],
       [ 6132, 11778]])

In [57]:
y_true, y_pred = true_valid[0].cpu().numpy(), torch.argmax(pred_valid[0], dim=1).cpu().numpy()
metrics.confusion_matrix(y_true, y_pred)

array([[1988,    5],
       [ 673, 1302]])

# Second Step - First solution

Now that we have the problematric ones, we can try to determine both the dV and the time of the maneuver.
How do we do ?

We use a simple linear model on the embedding.

In [58]:
dataset_path="DATA/TRAIN_1_IRREGULAR_STEPS_V2.json"

train_dataset_man_only = IrregularDataset(ManeuverDetectionDataset(dataset_path, dataset_type="TRAIN", filter_samples='MANEUVER_ONLY'))
valid_dataset_man_only = IrregularDataset(ManeuverDetectionDataset(dataset_path, dataset_type="VALIDATION", filter_samples='MANEUVER_ONLY'))

train_loader_man_only = DataLoader(train_dataset, batch_size=8, drop_last=True, shuffle=True)
valid_loader_man_only = DataLoader(valid_dataset, batch_size=8, drop_last=True, shuffle=False)




**********TRAIN DATASET *********
Validation/Train ratio: 0.1
Samples filtered? MANEUVER_ONLY
Samples evenly spaced? False
path: DATA/TRAIN_1_IRREGULAR_STEPS_V2.json
loading dataset. Ready in a minute!
TRAIN Dataset loaded. Size: 17902


**********VALIDATION DATASET *********
Validation/Train ratio: 0.1
Samples filtered? MANEUVER_ONLY
Samples evenly spaced? False
path: DATA/TRAIN_1_IRREGULAR_STEPS_V2.json
loading dataset. Ready in a minute!
VALIDATION Dataset loaded. Size: 1989


In [59]:
class ManeuverTimeHead(torch.nn.Module):
    def __init__(self, in_features) -> None:
        super().__init__()
        self.linear1 = torch.nn.Linear(in_features, out_features=5)
        self.relu = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(5, out_features=1)
        self.output_fn = torch.nn.Sigmoid()

    def forward(self, embedding):
        x = self.linear1(embedding)
        x = self.relu(x)
        x = self.linear2(x)
        return torch.squeeze(self.output_fn(x))

class DeltaVelocityHead(torch.nn.Module):
    def __init__(self, in_features) -> None:
        super().__init__()
        self.linear1 = torch.nn.Linear(in_features, out_features=5)
        self.relu = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(5, out_features=1)
        self.output_fn = torch.nn.Sigmoid()

    def forward(self, embedding):
        x = self.linear1(embedding)
        x = self.relu(x)
        x = self.linear2(x)
        return torch.squeeze(x) # Dv max is like 1.5 in absolute value
    
class Wrapper(torch.nn.Module):
    def __init__(self, convnet, model_to_train) -> None:
        super().__init__()
        self.convnet = convnet
        self.model_to_train = model_to_train
    
    def forward(self, x):
        c, embedding = self.convnet.predict(x, return_embedding=True) # NOTE : we suppose we only send in data with manuver in it
        output = self.model_to_train(embedding) 
        return output

In [60]:
maneuver_time_net = ManeuverTimeHead(linear_kwargs_1000['out_features'])
dv_net = DeltaVelocityHead(linear_kwargs_1000['out_features'])

# freeze network
for param in conv_net_1000.parameters():
    param.requires_grad = False

time_net_wrapper = Wrapper(conv_net_1000, maneuver_time_net)
dv_net_wrapper = Wrapper(conv_net_1000, dv_net)

In [61]:
time_trainer = TorchTrainer(model=time_net_wrapper, lr=5e-4, index_y=2, loss_function='L1Loss')

In [62]:
time_trainer.train(train_loader_man_only, epochs=10)

100%|██████████| 10/10 [01:15<00:00,  7.56s/it]


[0.21996212922206332,
 0.18889705005134175,
 0.18826138612014515,
 0.18815250698006686,
 0.1875954427024282,
 0.1873557610303804,
 0.18718162148216413,
 0.18668309471590014,
 0.1864413131724091,
 0.18525062181217208]

In [63]:
dv_trainer = TorchTrainer(model=dv_net_wrapper, lr=2e-4, index_y=1, loss_function='L1Loss')

In [64]:
dv_trainer.train(train_loader, epochs=10)

 30%|███       | 3/10 [00:24<00:56,  8.12s/it]

In [None]:
true_time, pred_time = time_trainer.predict(valid_loader_man_only, return_true=True)

In [None]:
true_dv, pred_dv = dv_trainer.predict(valid_loader_man_only, return_true=True)

In [None]:
l1_loss = torch.nn.L1Loss()

In [None]:
l1_loss(true_time[0], pred_time[0])

tensor(0.2270, dtype=torch.float64)

In [None]:
l1_loss(true_dv[0], pred_dv[0])

tensor(0.4851, dtype=torch.float64)

## Inference time !

In [42]:
    
class InferenceWrapper(torch.nn.Module):
    def __init__(self, convnet, dv_net, time_net) -> None:
        super().__init__()
        self.convnet = convnet
        self.dv_net = dv_net
        self.time_net = time_net
    
    def forward(self, x):
        c, embedding = self.convnet(x)
        dv = self.dv_net(embedding) 
        time = self.time_net(embedding)
        return (c, dv, time), embedding
    
    def predict(self, dataloader):
        self.eval()
        cc_list = []
        time_list = []
        dv_list = []
        with torch.no_grad():
            for x, y in dataloader:
                c, embedding = self.convnet.predict(x, return_embedding=True)
                cc = torch.argmax(c, dim=-1)
                # cache = cc == 1
                # dv = torch.zeros(cc.shape)
                # time = torch.zeros(cc.shape)
                dv = self.dv_net(embedding)
                time = self.time_net(embedding)
                cc_list.append(cc.cpu().numpy())
                time_list.append(time.cpu().numpy())
                dv_list.append(dv.cpu().numpy())
        return (np.concatenate(cc_list),
                np.array(dv_list),
                np.array(time_list))


In [43]:
test_dataset_path="DATA/TEST_FILE_PUBLIC.json"
test_dataset= ManeuverDetectionDataset(test_dataset_path, dataset_type="TEST")
test_dataset_irr = IrregularDataset(ManeuverDetectionDataset(dataset_path, dataset_type="TEST", imported_dataset=test_dataset.dataset))
test_loader = DataLoader(test_dataset_irr, batch_size=1, drop_last=True)



**********TEST DATASET *********
Validation/Train ratio: 0.1
Samples filtered? NO
Samples evenly spaced? False
path: DATA/TEST_FILE_PUBLIC.json
loading dataset. Ready in a minute!
TEST Dataset loaded. Size: 1559


**********TEST DATASET *********
Validation/Train ratio: 0.1
Samples filtered? NO
Samples evenly spaced? False
path: DATA/TRAIN_1_IRREGULAR_STEPS_V2.json
TEST Dataset loaded. Size: 1559


In [44]:
# freeze network

inference_wrapper = InferenceWrapper(
        convnet=conv_net_1000,
        dv_net=dv_net,
        time_net=maneuver_time_net
)
for param in inference_wrapper.parameters():
    param.requires_grad = False

In [45]:
preds = inference_wrapper.predict(test_loader)

In [46]:
pred = np.stack(preds, axis=1) 
print(pred.shape)

(1559, 3)


In [47]:
pred[:, 2]

array([0.2099672 , 0.20466721, 0.03757284, ..., 0.03443215, 0.02545385,
       0.02452524])

In [None]:
pred[:,2]=48*3600*pred[:, 2]

In [48]:
from SubmissionGenerator import create_submission
import numpy as np
# pred[:,1]=0.01*np.ones((len(test_dataset))) #dv
create_submission(pred,"DATA/prediction")

# Second step - Second Solution : for those we detected an anomaly for
We split them in several subseries (sliding windows) and we have to find where the anomaly start occuring. 
For each small window, we determine a new score of anomaly. We can refine as many time as required.

May be now we can try predicting the time of the anomaly occuring. I am not sure.

In [None]:
from ManeuverDetectionDataset import ManeuverDetectionDataset, ManeuverDetectionSlidingWindowDataset
from torch.utils.data import DataLoader

evenly_spaced_dataset_path="DATA/TRAIN_1_EVENLY_SPACED_V2.json"
evenly_spaced_dataset= ManeuverDetectionDataset(evenly_spaced_dataset_path, fixed_step=True) # window_size=30)
evenly_spaced_dataset_sliding_window = ManeuverDetectionSlidingWindowDataset(evenly_spaced_dataset, window_size=433)
# feature,is_maneuver,maneuver_dv,maneuver_time =next(iter(evenly_spaced_loader))
# print(f"features shape (batch size * nb of meas * nb of feature):{feature.shape}\nis maneuver: {is_maneuver.item()}\ndv (m/s): {maneuver_dv.item()}\nmaneuver date (seconds from the observation start): {maneuver_time.item()}")

In [None]:
evenly_spaced_loader = DataLoader(evenly_spaced_dataset_sliding_window, batch_size=32, drop_last=True, shuffle=True)

In [None]:
trainer.train(evenly_spaced_loader, epochs=3)

In [None]:
evenly_spaced_dataset_valid = ManeuverDetectionDataset(evenly_spaced_dataset_path, dataset_type='VALIDATION', fixed_step=True)
valid_set = ManeuverDetectionSlidingWindowDataset(evenly_spaced_dataset_valid, window_size=30)

In [None]:
valid_loader = DataLoader(evenly_spaced_dataset_sliding_window, batch_size=1, drop_last=False, shuffle=False)

In [None]:
true, pred = trainer.predict(valid_loader, return_true=True)

In [None]:
torch.count_nonzero(torch.argmax(pred[0], dim=1) == true[0])/true[0].shape[0]

In [None]:
from sklearn import metrics
y_true, y_pred = true[0].cpu().numpy(), torch.argmax(pred[0], dim=1).cpu().numpy()
metrics.confusion_matrix(y_true, y_pred) # problem with those definitely - class are completly UNBALANCED. 

In [None]:
block_kwargs_list_30 = [
    { # layer 1
        'conv_kwargs': {
            'in_channels': 2,
            'out_channels': 4,
            'kernel_size': 3,
            'stride': 1,
            'padding': 0,
            'dilation': 1,
            'groups': 1,
            'bias': True,
            'padding_mode': 'zeros'
        },
        'pool_kwargs': {
            'kernel_size': 3,
            'stride': None,
            'padding': 0,
            'dilation': 1
        },
        'dropout_rate': 0
    },
    { # layer 2
        'conv_kwargs': {
            'in_channels': 4,
            'out_channels': 4,
            'kernel_size': 3,
            'stride': 1,
            'padding': 0,
            'dilation': 1,
            'groups': 1,
            'bias': True,
            'padding_mode': 'zeros'
        },
        'pool_kwargs': {
            'kernel_size': 3,
            'stride': None,
            'padding': 0,
            'dilation': 1
        },
        'dropout_rate': 0
    }
]
linear_kwargs_30 = {
    'in_features': 8,
    'out_features': 10 # size of the projection space (dimension reduction)
}
conv_net_30 = Cnn1d(block_kwargs_list_30, linear_kwargs_30).float()


In [None]:
19916/536388 * 100 # 3.72% 

In [None]:
cross_entropy_loss = torch.nn.CrossEntropyLoss(weight=torch.tensor([0.05, 0.95]))
mae_loss_1 = torch.nn.L1Loss()
mae_loss_2 = torch.nn.L1Loss()
def total_loss_function(pred, true, alpha=0.5, beta=0.1, gamma=0.4):
    c_true, dv_true, date_true = true
    c, dv, date = pred
    return alpha * cross_entropy_loss(c, c_true) + beta * mae_loss_1(dv, dv_true) + gamma * mae_loss_2(date_true, date)
  