# 点航分支

In [8]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process.kernels import RBF
from sktime.dists_kernels import AggrDist
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.classification.kernel_based import RocketClassifier, TimeSeriesSVC
from sktime.classification.hybrid import HIVECOTEV2
from sktime.classification.ensemble import WeightedEnsembleClassifier
from sktime.dists_kernels import FlatDist, ScipyDist
import plotly.graph_objects as go
import polars as pl
import numpy as np
import pickle
import os
import glob
import re

from data.track_preprocess import TrajectoryDataProcessor, process_df

In [9]:
DATA_ROOT = r'D:\DataSets\挑战杯_揭榜挂帅_CQ-08赛题_数据集'
NUM_CLASSES = 4
SEQ_LEN = 29
ABNORMAL_BATCH_ID = [1451, 1452, 1457, 1462, 1467, 1469, 1473, 1478, 1484, 1487, 1488, 1490, 1494, 1496, 1497, 1500]

In [11]:
# 查找支持多元时序分类的模型
from sktime.registry import all_estimators

all_estimators("classifier", filter_tags={"capability:multivariate": True}, as_dataframe=True)

Unnamed: 0,name,object
0,Arsenal,<class 'sktime.classification.kernel_based._arsenal.Arsenal'>
1,BaggingClassifier,<class 'sktime.classification.ensemble._bagging.BaggingClassifier'>
2,CNNClassifier,<class 'sktime.classification.deep_learning.cnn.CNNClassifier'>
3,CNTCClassifier,<class 'sktime.classification.deep_learning.cntc.CNTCClassifier'>
4,CanonicalIntervalForest,<class 'sktime.classification.interval_based._cif.CanonicalIntervalForest'>
5,Catch22Classifier,<class 'sktime.classification.feature_based._catch22_classifier.Catch22Classifier'>
6,ColumnEnsembleClassifier,<class 'sktime.classification.compose._column_ensemble.ColumnEnsembleClassifier'>
7,DrCIF,<class 'sktime.classification.interval_based._drcif.DrCIF'>
8,DummyClassifier,<class 'sktime.classification.dummy._dummy.DummyClassifier'>
9,FCNClassifier,<class 'sktime.classification.deep_learning.fcn.FCNClassifier'>


In [3]:
data = []
labels = []

point_files = glob.glob(os.path.join(DATA_ROOT, '点迹/PointTracks_*.txt'))
for point_file in point_files:
    match_result = re.match(r'PointTracks_(\d+)_(\d+)_(\d+).txt', os.path.basename(point_file))
    batch_id = int(match_result.group(1))
    label = int(match_result.group(2))
    num_tracks = int(match_result.group(3))
    if label > NUM_CLASSES or batch_id in ABNORMAL_BATCH_ID:
        continue
    track_file = os.path.join(DATA_ROOT, f"航迹/Tracks_{batch_id}_{label}_{num_tracks}.txt")

    preprocessed_data = TrajectoryDataProcessor(point_file, track_file).get_processed_data()
    point_df = pl.from_pandas(preprocessed_data['point_data'])
    track_df = pl.from_pandas(preprocessed_data['track_data'])
    df = point_df.join(track_df, on=["时间", "批号"], how="left").sort("时间")
    df = process_df(df)
    data_batch = df.to_numpy(order='c').astype(np.float64)
    data.append(data_batch)
    labels.append(label - 1)
labels = np.array(labels)

  series_copy.iloc[i] = predicted_value
 8561698.         2925092.          582255.         2391100.
  858322.         1008002.         4644832.         1809684.
 2517474.         3830768.          398214.        ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  processed_data.loc[batch_mask, col] = corrected_series.values
  series_copy.iloc[i] = predicted_value
 1018088.         1036907.         1017761.          969333.
  909217.         1004950.         1107930.         1102379.
 1255769.         1091109.         1147357.96703297]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  processed_data.loc[batch_mask, col] = corrected_series.values
  series_copy.iloc[i] = predicted_value
   76424.           77683.           60992.           68746.
  110083.          126032.          276345.          201053.
  290143.          560800.          725730.          658514.
  564820.         2222912.          295911.

In [4]:
def get_data(data, time: int):
    if time > SEQ_LEN:
        raise ValueError(f"time {time} should be less than or equal to SEQ_LEN {SEQ_LEN}")
    new_data = []
    for data_batch in data:
        num_tracks = len(data_batch)
        if num_tracks >= time:
            data_batch = data_batch[:time, :]
        else:
            data_batch = np.concat([
                data_batch,
                np.stack([
                    data_batch[-1, :] for _ in range(time - num_tracks)
                ], axis=0)
            ], axis=0)
        new_data.append(data_batch)
    new_data = np.stack(new_data, axis=0)
    train_data, test_data, train_labels, test_labels = train_test_split(new_data, labels, test_size=0.2, random_state=42)
    train_scaler = StandardScaler()
    train_data = train_scaler.fit_transform(train_data.reshape(-1, train_data.shape[-1])).reshape(-1, time, train_data.shape[-1])
    test_data = train_scaler.transform(test_data.reshape(-1, test_data.shape[-1])).reshape(-1, time, test_data.shape[-1])
    return train_data, test_data, train_labels, test_labels, train_scaler

In [5]:
def calc_acc(pred_labels, test_labels, num_classes):
    acc = accuracy_score(test_labels, pred_labels)
    acc_by_class = []
    for i in range(num_classes):
        acc_by_class.append(accuracy_score(test_labels[test_labels == i], pred_labels[test_labels == i]))
    return acc, acc_by_class

## KNN-TSC

In [10]:
def train_and_test_KNN_TSC(k, save_path=None):
    euclidean_dist = FlatDist(ScipyDist())
    acc_by_time = []

    for t in range(1, SEQ_LEN + 1):
        train_data, test_data, train_labels, test_labels, train_scaler = get_data(data, t)
        knn_tsc = KNeighborsTimeSeriesClassifier(n_neighbors=k, distance=euclidean_dist)
        knn_tsc.fit(train_data, train_labels)
        if save_path is not None:
            os.makedirs(save_path, exist_ok=True)
            filename = os.path.join(save_path, f"knn_tsc_{t}.pkl")
            with open(filename, "wb") as f:
                pickle.dump(knn_tsc, f)
        pred_labels = knn_tsc.predict(test_data)
        acc, acc_by_class = calc_acc(pred_labels, test_labels, NUM_CLASSES)
        print(f"------------- Time {t} -------------")
        print(f"Accuracy: {acc}")
        acc_by_time.append(acc)
        for i in range(NUM_CLASSES):
            print(f"Accuracy of class {i+1}: {acc_by_class[i]}")
    return acc_by_time

In [11]:
fig = go.Figure()
for k in [1, 3, 5, 7, 9]:
    acc_by_time = train_and_test_KNN_TSC(k)
    fig.add_trace(go.Scatter(x=list(range(1, SEQ_LEN+1)), y=acc_by_time, name=f"K={k}", mode="lines"))
fig.update_layout(title="KNN-TSC Accuracy by Time", xaxis_title="Time", yaxis_title="Accuracy")
fig.show()
# select k=3

------------- Time 1 -------------
Accuracy: 0.8700361010830325
Accuracy of class 1: 0.8115942028985508
Accuracy of class 2: 0.9516129032258065
Accuracy of class 3: 0.7638888888888888
Accuracy of class 4: 0.9594594594594594
------------- Time 2 -------------
Accuracy: 0.8447653429602888
Accuracy of class 1: 0.7681159420289855
Accuracy of class 2: 0.9354838709677419
Accuracy of class 3: 0.7777777777777778
Accuracy of class 4: 0.9054054054054054
------------- Time 3 -------------
Accuracy: 0.8844765342960289
Accuracy of class 1: 0.8115942028985508
Accuracy of class 2: 0.9354838709677419
Accuracy of class 3: 0.8472222222222222
Accuracy of class 4: 0.9459459459459459
------------- Time 4 -------------
Accuracy: 0.8736462093862816
Accuracy of class 1: 0.7681159420289855
Accuracy of class 2: 0.9354838709677419
Accuracy of class 3: 0.8472222222222222
Accuracy of class 4: 0.9459459459459459
------------- Time 5 -------------
Accuracy: 0.8628158844765343
Accuracy of class 1: 0.7536231884057971


In [6]:
fig = go.Figure()
acc_by_time = train_and_test_KNN_TSC(k=3, save_path="ckpt/track_models/knn_tsc")
fig.add_trace(go.Scatter(x=list(range(1, SEQ_LEN+1)), y=acc_by_time, mode="lines"))
fig.update_layout(title="KNN-TSC Accuracy by Time", xaxis_title="Time", yaxis_title="Accuracy")
fig.show()

------------- Time 1 -------------
Accuracy: 0.8303249097472925
Accuracy of class 1: 0.7681159420289855
Accuracy of class 2: 0.9516129032258065
Accuracy of class 3: 0.7083333333333334
Accuracy of class 4: 0.9054054054054054
------------- Time 2 -------------
Accuracy: 0.851985559566787
Accuracy of class 1: 0.7536231884057971
Accuracy of class 2: 0.9032258064516129
Accuracy of class 3: 0.8055555555555556
Accuracy of class 4: 0.9459459459459459
------------- Time 3 -------------
Accuracy: 0.8628158844765343
Accuracy of class 1: 0.7536231884057971
Accuracy of class 2: 0.9193548387096774
Accuracy of class 3: 0.8055555555555556
Accuracy of class 4: 0.972972972972973
------------- Time 4 -------------
Accuracy: 0.8628158844765343
Accuracy of class 1: 0.7971014492753623
Accuracy of class 2: 0.9032258064516129
Accuracy of class 3: 0.7777777777777778
Accuracy of class 4: 0.972972972972973
------------- Time 5 -------------
Accuracy: 0.8736462093862816
Accuracy of class 1: 0.8115942028985508
Acc

## ROCKET

In [10]:
def train_and_test_ROCKET_TSC(num_kernels, save_path=None, begin_time=1):
    acc_by_time = []
    pred = []
    is_start = [False for _ in range(int(len(data) * 0.8))]
    avg_begin_time = 0
    for t in range(1, SEQ_LEN + 1):
        train_data, test_data, train_labels, test_labels, train_scaler = get_data(data, t)
        rocket_tsc = RocketClassifier(rocket_transform="multirocket", num_kernels=num_kernels, random_state=42)
        cv_results = cross_validate(rocket_tsc, train_data, train_labels, cv=4, scoring="accuracy", return_estimator=True)
        best_index = np.argmax(cv_results["test_score"])
        best_model = cv_results["estimator"][best_index]
        if save_path is not None:
            os.makedirs(save_path, exist_ok=True)
            filename = os.path.join(save_path, f"rocket_tsc_{t}.pkl")
            with open(filename, "wb") as f:
                pickle.dump(best_model, f)
            filename = os.path.join(save_path, f"scaler_{t}.pkl")
            with open(filename, "wb") as f:
                pickle.dump(train_scaler, f)
        y_pred_proba = best_model.predict_proba(test_data)
        y_pred = []
        for i in range(len(y_pred_proba)):
            max_index = np.argmax(y_pred_proba[i])
            if not is_start[i]:
                if y_pred_proba[i][max_index] > 0.5 and t >= begin_time:
                    is_start[i] = True
                    avg_begin_time += t
                    y_pred.append(max_index)
                else:
                    y_pred.append(-1)
            else:
                y_pred.append(max_index)
        pred.append(y_pred)
        pred_labels = best_model.predict(test_data)
        acc, acc_by_class = calc_acc(pred_labels, test_labels, NUM_CLASSES)
        print(f"------------- Time {t} -------------")
        print(f"Accuracy: {acc}")
        acc_by_time.append(acc)
        for i in range(NUM_CLASSES):
            print(f"Accuracy of class {i+1}: {acc_by_class[i]}")
    pred = np.array(pred)
    corrects = np.zeros(NUM_CLASSES)
    totals = np.bincount(test_labels, minlength=NUM_CLASSES)
    conf_matrix = np.zeros((NUM_CLASSES, NUM_CLASSES))
    avg_rate = 0
    for i in range(pred.shape[1]):
        batch_pred = pred[:, i]
        batch_pred = batch_pred[batch_pred != -1]
        if len(batch_pred) == 0:
            continue
        unique_vals, counts = np.unique(batch_pred, return_counts=True)
        pred_label = unique_vals[np.argmax(counts)]
        rate = len(batch_pred[batch_pred == pred_label]) / len(batch_pred)
        avg_rate += rate
        label = test_labels[i]
        if rate > 0.9 and label == pred_label:
            corrects[label] += 1
        conf_matrix[label, pred_label] += 1
    accuracy = np.sum(corrects) / np.sum(totals)
    totals[totals == 0] = 1
    accuracy_by_class = corrects / totals
    avg_begin_time /= len(pred[0])
    avg_rate /= len(pred[0])
    print(f"accuracy: {accuracy:.4f}")
    print(f"accuracy by class: {accuracy_by_class}")
    print(f"average begin time: {avg_begin_time}")
    print(f"average rate: {avg_rate}")
    print(f"confusion matrix:\n{conf_matrix}")
    return acc_by_time

In [9]:
fig = go.Figure()
for num_kernels in [1000, 2000, 5000, 10000, 20000]:
    acc_by_time = train_and_test_ROCKET_TSC(num_kernels)
    fig.add_trace(go.Scatter(x=list(range(1, SEQ_LEN+1)), y=acc_by_time, name=f"K={num_kernels}", mode="lines"))
fig.update_layout(title="ROCKET Accuracy by Time", xaxis_title="Time", yaxis_title="Accuracy")
fig.show()
# select num_kernels=10000


KeyboardInterrupt



In [11]:
fig = go.Figure()
acc_by_time = train_and_test_ROCKET_TSC(num_kernels=20000, save_path="ckpt/track_models/rocket_tsc", begin_time=2)
fig.add_trace(go.Scatter(x=list(range(1, SEQ_LEN+1)), y=acc_by_time, mode="lines"))
fig.update_layout(title="ROCKET Accuracy by Time", xaxis_title="Time", yaxis_title="Accuracy")
fig.show()

------------- Time 1 -------------
Accuracy: 0.8880866425992779
Accuracy of class 1: 0.8115942028985508
Accuracy of class 2: 0.967741935483871
Accuracy of class 3: 0.8194444444444444
Accuracy of class 4: 0.9594594594594594
------------- Time 2 -------------
Accuracy: 0.8844765342960289
Accuracy of class 1: 0.8260869565217391
Accuracy of class 2: 0.9354838709677419
Accuracy of class 3: 0.8055555555555556
Accuracy of class 4: 0.972972972972973
------------- Time 3 -------------
Accuracy: 0.8953068592057761
Accuracy of class 1: 0.8985507246376812
Accuracy of class 2: 0.9354838709677419
Accuracy of class 3: 0.7777777777777778
Accuracy of class 4: 0.972972972972973
------------- Time 4 -------------
Accuracy: 0.924187725631769
Accuracy of class 1: 0.8985507246376812
Accuracy of class 2: 0.9354838709677419
Accuracy of class 3: 0.9027777777777778
Accuracy of class 4: 0.9594594594594594
------------- Time 5 -------------
Accuracy: 0.9205776173285198
Accuracy of class 1: 0.9130434782608695
Accu

## SVC

In [None]:
def train_and_test_SVC():
    acc_by_time = []
    for t in range(1, SEQ_LEN + 1):
        train_data, test_data, train_labels, test_labels, train_scaler = get_data(data, t)
        svc_tsc = TimeSeriesSVC(kernel=AggrDist(RBF()), random_state=42)
        svc_tsc.fit(train_data, train_labels)
        pred_labels = svc_tsc.predict(test_data)
        acc, acc_by_class = calc_acc(pred_labels, test_labels, NUM_CLASSES)
        print(f"------------- Time {t} -------------")
        print(f"Accuracy: {acc}")
        acc_by_time.append(acc)
        for i in range(NUM_CLASSES):
            print(f"Accuracy of class {i+1}: {acc_by_class[i]}")
    return acc_by_time

In [None]:
fig = go.Figure()
acc_by_time = train_and_test_SVC()
fig.add_trace(go.Scatter(x=list(range(1, SEQ_LEN+1)), y=acc_by_time, name="SVC", mode="lines"))
fig.update_layout(title="SVC Accuracy by Time", xaxis_title="Time", yaxis_title="Accuracy")
fig.show()
# exclude SVC

## HIVECOTEV2

In [None]:
def train_and_test_HIVECOTEV2():
    acc_by_time = []
    for t in range(1, SEQ_LEN + 1):
        train_data, test_data, train_labels, test_labels, train_scaler = get_data(data, t)
        hivecotev2_tsc = HIVECOTEV2(random_state=42)
        hivecotev2_tsc.fit(train_data, train_labels)
        pred_labels = hivecotev2_tsc.predict(test_data)
        acc, acc_by_class = calc_acc(pred_labels, test_labels, NUM_CLASSES)
        print(f"------------- Time {t} -------------")
        print(f"Accuracy: {acc}")
        acc_by_time.append(acc)
        for i in range(NUM_CLASSES):
            print(f"Accuracy of class {i+1}: {acc_by_class[i]}")
    return acc_by_time

In [None]:
fig = go.Figure()
acc_by_time = train_and_test_HIVECOTEV2()
fig.add_trace(go.Scatter(x=list(range(1, SEQ_LEN+1)), y=acc_by_time, name="HIVECOTEV2", mode="lines"))
fig.update_layout(title="HIVECOTEV2 Accuracy by Time", xaxis_title="Time", yaxis_title="Accuracy")
fig.show()

## WE

In [None]:
def train_and_test_WE():
    acc_by_time = []
    for t in range(1, SEQ_LEN + 1):
        train_data, test_data, train_labels, test_labels, train_scaler = get_data(data, t)
        we_tsc = WeightedEnsembleClassifier(
            [
                ("KNN-TSC", KNeighborsTimeSeriesClassifier(n_neighbors=3, distance=FlatDist(ScipyDist()))),
                ("ROCKET", RocketClassifier(num_kernels=20000, random_state=42)),
            ],
            weights=[2, 1, 1, 1],
        )
        we_tsc.fit(train_data, train_labels)
        pred_labels = we_tsc.predict(test_data)
        acc, acc_by_class = calc_acc(pred_labels, test_labels, NUM_CLASSES)
        print(f"------------- Time {t} -------------")
        print(f"Accuracy: {acc}")
        acc_by_time.append(acc)
        for i in range(NUM_CLASSES):
            print(f"Accuracy of class {i+1}: {acc_by_class[i]}")
    return acc_by_time

In [None]:
fig = go.Figure()
acc_by_time = train_and_test_WE()
fig.add_trace(go.Scatter(x=list(range(1, SEQ_LEN+1)), y=acc_by_time, name="WE", mode="lines"))
fig.update_layout(title="WE Accuracy by Time", xaxis_title="Time", yaxis_title="Accuracy")
fig.show()