In [1]:
import pandas as pd

#filename = "aisdk-2023-11-08-xs_2_kkn"
filename = "aisTestFile_500k_kkn"

# data =  pd.read_csv("../workdir/AIS-KNN-Files/aisdk-2023-11-08-xs_2_kkn.csv", lineterminator='$').values.reshape((-1,100,6))
data =  pd.read_csv("../workdir/AIS-KNN-Files/"+filename+".csv", lineterminator='$').values.reshape((-1,100,6))

#data =  pd.read_csv("../workdir/AIS-KNN-Files/aisdk-2023-11-08-xs_2_kkn.csv", lineterminator='$').values.reshape((-1,100,6))

In [2]:
# https://github.com/sktime/sktime/blob/v0.11.4/sktime/datatypes/_panel/_convert.py#L608-L668
multiindex = pd.MultiIndex.from_product([range(s) for s in data.shape], names=["track", "timeindex", "column"])
data = pd.Series(data.flatten(), index=multiindex).unstack(level="column")
data.columns = ["mmsi", "time", "x", "y", "deg", "dist"]
data = data.drop(columns=["deg", "dist"])
data["time"] = pd.to_datetime(data["time"], unit="s")
train_ships = pd.Series(data.mmsi.unique()).sample(frac=0.7, random_state=42)
train = data[data.mmsi.isin(train_ships)].drop(columns="mmsi")
test = data[~data.mmsi.isin(train_ships)].drop(columns="mmsi")
from joblib import Parallel, delayed

In [3]:
train.head(101)

Unnamed: 0_level_0,Unnamed: 1_level_0,time,x,y
track,timeindex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
31,0,2023-11-07 23:16:04,-7990.1,-3260.7
31,1,2023-11-07 23:16:14,-7995.9,-3239.0
31,2,2023-11-07 23:16:24,-8001.7,-3218.4
31,3,2023-11-07 23:16:36,-8007.8,-3194.0
31,4,2023-11-07 23:16:44,-8012.9,-3176.8
31,...,...,...,...
31,96,2023-11-07 23:34:14,-8475.9,-1264.6
31,97,2023-11-07 23:34:25,-8471.8,-1260.6
31,98,2023-11-07 23:34:34,-8467.0,-1256.9
31,99,2023-11-07 23:34:45,-8462.7,-1252.7


In [4]:
from joblib import Parallel, delayed
from tqdm import tqdm
import numpy as np

def generate_row(df):
    df = df.copy()
    df["src"] = "output"
    df["time"] = (df["time"].max()-df["time"]).dt.total_seconds()
    df["x"] = df["x"] - df["x"].iloc[0]
    df["y"] = df["y"] - df["y"].iloc[0]
    # df['deltaX'] = pd.Series(df["x"].iloc[0], index=df.index)
    # df['deltaY'] = pd.Series(df["y"].iloc[0], index=df.index)
    # df['deltaTime'] = pd.Series(test["time"].max(), index=df.index)
    df.iloc[:-1,-1] = "input_" + pd.Series(np.arange(df.shape[0]-1)).astype(str)
    df=df.melt(id_vars="src")
    df["colname"] = df.src + "_" + df.variable
    df=df.drop(columns=["src", "variable"]).set_index("colname")
    #df=df.drop(columns=["timeindex"]).set_index("colname")
    df=df.transpose().reset_index(drop=True)
    df.columns.name = None
    df=df.sort_index(axis=1).drop(["output_time"], axis=1)
    df=df.loc[:, ~df.columns.to_series().str.endswith('index')]
    #df=df.drop(columns.to_series().str.ends_with('index'))

    return df

def generate_model_frame(df, knowns):
    window_size = knowns + 1
    skipped_windows = 4

    windows = df.reset_index(drop=False).groupby('track').rolling(window_size)
    windows = (win for i, win in enumerate(windows) if len(win) == window_size and i % (skipped_windows + 1) == 0)
    windows = list(windows)
    res = Parallel(n_jobs=-1)(delayed(generate_row)(win) for win in tqdm(windows))
    #res = (generate_row(win) for win in tqdm(windows))
    return pd.concat(res, ignore_index=True)


In [5]:
from pathlib import Path
targetrows_knowns = 2, 3 #6, 9, 12
for target in targetrows_knowns:
    path = Path(f"../workdir/AIS-ModelFrames/{filename}_train_{target}_{0}.csv")
    if not path.exists():     
        train_model = generate_model_frame(train, target)
        test_model = generate_model_frame(test, target)
        train_model.to_csv(path, index=False)
        test_model.to_csv(f"../workdir/AIS-ModelFrames/{filename}_test_{target}_{0}.csv", index=False)
        print(f"Done with {target}")

100%|██████████| 1425/1425 [00:08<00:00, 159.57it/s]
100%|██████████| 589/589 [00:02<00:00, 264.81it/s]


Done with 2


100%|██████████| 1425/1425 [00:07<00:00, 201.57it/s]
100%|██████████| 589/589 [00:02<00:00, 225.91it/s]


Done with 3


In [6]:
from joblib import Parallel, delayed
from tqdm import tqdm
import numpy as np

test = test.copy()
test["src"] = "output"
test["time"] = (test["time"].max()-test["time"]).dt.total_seconds()
test['e'] = pd.Series((test["time"].max()) , index=test.index)
test.head(100)

Unnamed: 0_level_0,Unnamed: 1_level_0,time,x,y,src,e
track,timeindex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0,1890.0,-4157.6,-2464.4,output,1890.0
0,1,1880.0,-4157.1,-2464.3,output,1890.0
0,2,1871.0,-4157.1,-2464.1,output,1890.0
0,3,1870.0,-4157.1,-2464.1,output,1890.0
0,4,1860.0,-4157.2,-2463.8,output,1890.0
0,...,...,...,...,...,...
0,95,1010.0,-4149.3,-2462.5,output,1890.0
0,96,1000.0,-4148.3,-2462.6,output,1890.0
0,97,991.0,-4147.7,-2462.9,output,1890.0
0,98,980.0,-4146.6,-2462.7,output,1890.0


## Resample Idea (nicht weiter verfolgt)

In [7]:
time_resample = "10s"
resampled = example.resample(time_resample, level="time").mean().interpolate(method="time")
resampled

NameError: name 'example' is not defined

In [114]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(example['x'], example['y'], label='Example')
plt.plot(resampled['x'], resampled['y'], label=f'Resampled {time_resample}')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('X and Y Coordinates Through Time')
plt.legend()
plt.show()

In [112]:
example

In [124]:
X_resampled = X.reset_index().drop(columns=["epoch_time", "degree", "distance", "timepoint"]).set_index(['track','time','mmsi'])
X_resampled = X_resampled.groupby(["track","mmsi"]).apply(lambda df: df.resample(time_resample, level="time").mean().interpolate(method="time"))
X_resampled.head()

In [126]:
import gc
gc.collect()

In [127]:
X_resampled.groupby("track").size().describe()

Ziel: Bei Zeit 0

Beispiele für 3:
1. -5s, -10s, -15s:
2. -5s, -10s, -20s:
3. -30s, -60s, -61s:

In [None]:
target_y, target_x               , old5s_x, old5s_y, old10s_x, old10s_y

In [None]:
target_y, target_x,           old5s_x, old5s_y, old10s_x, old10s_y, old15s_x, old15s_y, old20s_x, old20s_y,

In [None]:
9 Eingänge
8 hidden Neuron
2 Ausgänge


In [278]:
import pandas as pd
import numpy as np
import seaborn as sns
from tqdm.auto import tqdm

In [279]:
# https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
data_in = pd.read_csv("../workdir/AIS-KNN-Files/aisdk-2023-11-08-xs_2_kkn.csv", lineterminator="$", header=None)

# KKN Data Analysis

In [280]:
# (bewegung, zeitpunkte, daten)
resshaped = data_in.values.reshape((-1, 100, 6))
# resshaped[0,0,:]
# X,Y and Degree,Distance are the same only different coordinate systems
# MMSI,Time,X,Y,Degree,Distance

In [281]:
resshaped.shape

In [282]:
resshaped.shape

In [283]:
tracks = range(resshaped.shape[0])
timepoints = range(resshaped.shape[1])
columns = range(resshaped.shape[2])

# stolen from https://github.com/sktime/sktime/blob/v0.11.4/sktime/datatypes/_panel/_convert.py#L608-L668
index = pd.MultiIndex.from_product([tracks, timepoints, columns], names=['track', 'timepoint', 'column'])
X = pd.Series(resshaped.flatten(), index=index)
X = X.unstack(level='column')
X.columns = ['mmsi', 'epoch_time', 'x', 'y', 'degree', 'distance']
X['time'] = pd.to_datetime(X['epoch_time'], unit='s')

In [284]:
# Time Ranges
X.groupby('track')["time"].diff().dt.total_seconds().describe()

In [285]:
secs = X.groupby('track')["time"].diff().dt.total_seconds()
_ = sns.histplot(secs, binwidth=1)

In [286]:
example = X.query("track == 0").copy()
example.time = example.time - example.time.min()
example = example.reset_index().set_index(['track','time','mmsi']).drop(columns=["epoch_time", "degree", "distance", "timepoint"])

1. 3 Bekannte, 1 Ziel
2. 6 Unbekannte, 1 Ziel
3. 12 Unbekannte, 1 Ziel