In [1]:
%load_ext autoreload

In [2]:
import pandas as pd
import numpy as np
import re
import xgboost as xgb
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit

import loadBar
from csv_parser import CSVParser
from globals import RESOURCE_FOLDER
from markovSquares import MarkovSquares
from data_handler import LocalToLargeDataLoader
from searoutePointFinder import fill_with_proximity


In [3]:
data_loader = LocalToLargeDataLoader(print_progress=True)
parsed_data = data_loader.load_raw_data(path="../../Project materials(1)")


Retrieving training data...


In [4]:
index_data = parsed_data.copy()
index_data.set_index("time", inplace=True)

In [5]:
def resampler(df, sorting_column, freq):
    unique_ids = df[sorting_column].unique()
    final_df = pd.DataFrame()
    partial_list = []

    for i in range(len(unique_ids)):
        loadBar.load_bar(len(unique_ids),i+1)
        resample_partial = df[df[sorting_column] == unique_ids[i]].resample(freq).last()

        resample_partial = fill_with_proximity(resample_partial)
        partial_list.append(resample_partial)

    for chunk in partial_list:
        final_df = pd.concat([final_df,chunk])
    
    return final_df

print(index_data)
resampled_data_20min = resampler(index_data, "vesselId", "20min")

resampled_data_20min.to_csv("../../Project materials(1)/data_resampled_20min.csv")

                       cog   sog  rot  heading  navstat  latitude  longitude  \
time                                                                           
2024-01-01 00:00:25  284.0   0.7    0       88        0 -34.74370  -57.85130   
2024-01-01 00:00:36  109.6   0.0   -6      347        1   8.89440  -79.47939   
2024-01-01 00:01:45  111.0  11.0    0      112        0  39.19065  -76.47567   
2024-01-01 00:03:11   96.4   0.0    0      142        1 -34.41189  151.02067   
2024-01-01 00:03:51  214.0  19.7    0      215        0  35.88379   -5.91636   
...                    ...   ...  ...      ...      ...       ...        ...   
2024-05-07 23:59:07  359.1  13.4    0        1        0  52.19131   -5.82223   
2024-05-07 23:59:08   12.3  17.1    0       13        0  38.96142  -12.00502   
2024-05-07 23:59:08  269.8  14.9   -1      270        0  49.71372   -5.22042   
2024-05-07 23:59:08    8.0  18.7    0        6        0  38.27895   10.78280   
2024-05-07 23:59:08  336.0  14.3    5   

In [6]:
print(resampled_data_20min)

                       cog   sog  rot  heading  navstat   latitude  \
time                                                                 
2024-01-01 00:00:00  284.0   0.7  0.0     88.0      0.0 -34.743700   
2024-01-01 00:20:00  284.0   0.7  0.0     88.0      0.0 -34.627229   
2024-01-01 00:40:00  284.0   0.7  0.0     88.0      0.0 -34.510757   
2024-01-01 01:00:00  284.0   0.7  0.0     88.0      0.0 -34.394286   
2024-01-01 01:20:00  284.0   0.7  0.0     88.0      0.0 -34.277814   
...                    ...   ...  ...      ...      ...        ...   
2024-05-04 04:00:00   60.7  16.0  0.0     54.0      0.0  37.064840   
2024-05-04 04:20:00   58.2  15.7  0.0     51.0      0.0  37.110410   
2024-05-04 04:40:00   57.7  15.4  0.0     50.0      0.0  37.157450   
2024-05-04 05:00:00   58.7  15.1  0.0     48.0      0.0  37.204090   
2024-05-04 05:20:00   59.8  14.9  0.0     48.0      0.0  37.222310   

                      longitude                  vesselId  \
time                        

In [7]:
class MarkovSquares():

    def __init__(self, square_size):
        self.square_size = square_size
        self.normalized_markov_matrix = None
        self.direction_columns = ["SW", "S", "SE", "W", "C", "E", "NW", "N", "NE"]

    def markovSquares(self, df, sorting_column):
        unique_sorts = df[sorting_column].unique()

        markov_matrix = np.zeros((180//self.square_size,360//self.square_size,9))
        print("Generating Markov Matrix")
        for j, sorts in enumerate(unique_sorts):
            
            loadBar.load_bar(len(unique_sorts),j)
            sort_df = df[df[sorting_column] == sorts]

            #Iterate through all columns for input features
            for i in range(len(sort_df)-1):
                entry1 = sort_df.iloc[i]
                entry2 = sort_df.iloc[i+1]

                lat1 = entry1["latitude"]
                lon1 = entry1["longitude"]

                lat2 = entry2["latitude"]
                lon2 = entry2["longitude"]

                lat_diff = (lat1-lat2)//self.square_size
                lon_diff = (lon1-lon2)//self.square_size

                inner_idx=self.markov_index_inner(lat_diff, lon_diff)

                lat_idx=int(lat1//self.square_size)
                lon_idx=int(lon1//self.square_size)
            
                markov_matrix[lat_idx][lon_idx][inner_idx]+=1
            
        self.normalized_markov_matrix  = markov_matrix / np.sum(markov_matrix, axis=-1, keepdims=True)

    def add_as_columns(self, df):
        # Define column names for the new data
        column_names = self.direction_columns
        
        # Initialize a DataFrame to hold the results
        results_df = pd.DataFrame(columns=column_names, index=df.index)
        
        print("Adding Markov Squares as columns")
        i=0
        for idx, row in df.iterrows():
            i+=1
            loadBar.load_bar(len(df),i)
            # Extract latitude and longitude values
            latitude = row["latitude"]
            longitude = row["longitude"]
            
            # Check for NaN values in latitude and longitude
            if pd.notna(latitude) and pd.notna(longitude):
                # Get the processed values (assumes `get_markov_square` returns a list of 9 values)
                processed_values = self.get_markov_square(latitude, longitude)
                # Set the result in the corresponding row of results_df
                results_df.loc[idx] = processed_values
            else:
                # Set NaN for each new column if latitude or longitude is NaN
                results_df.loc[idx] = [np.nan] * 9

        # Concatenate results_df with the original df along the columns axis
        df = pd.concat([df, results_df], axis=1)
        
        return df


    def get_markov_square(self, lat, lon):
        lat_idx = int(lat//self.square_size)
        lon_idx = int(lon//self.square_size)
        return self.normalized_markov_matrix[lat_idx][lon_idx]

    def markov_index_inner(self, lat_diff, lon_diff):
        lat_index = 0 if lat_diff <= -1 else (1 if lat_diff == 0 else 2)
        lon_index = 0 if lon_diff <= -1 else (1 if lon_diff == 0 else 2)
        index = lat_index * 3 + lon_index
        return index

In [8]:
total_df = pd.read_csv("../../Project materials(1)/data_resampled_20min.csv")


# total_df = pd.read_csv(RESOURCE_FOLDER+"/resampled_data_h.csv")

total_df['etaParsed'] = pd.to_datetime(total_df['etaParsed'])
total_df["time"] = pd.to_datetime(total_df['time'])

start_date = pd.to_datetime('2024-01-01')

total_df["etaParsed"] = (total_df['etaParsed'] - start_date).dt.days

In [9]:
#markov = MarkovSquares(2)
#markov.markovSquares(total_df, "vesselId")
#np.save("../../Project materials(1)/markov_matrix.npy", markov.normalized_markov_matrix)

In [None]:
#np.save("../../Project materials(1)/markov_matrix.npy", markov.normalized_markov_matrix)
#markov = np.load("../../Project materials(1)/markov_matrix.npy")
#total_df = markov.add_as_columns(total_df)

AttributeError: 'numpy.ndarray' object has no attribute 'add_as_columns'

In [11]:

print(total_df.head())

                 time    cog  sog  rot  heading  navstat   latitude  \
0 2024-01-01 00:00:00  284.0  0.7  0.0     88.0      0.0 -34.743700   
1 2024-01-01 00:20:00  284.0  0.7  0.0     88.0      0.0 -34.627229   
2 2024-01-01 00:40:00  284.0  0.7  0.0     88.0      0.0 -34.510757   
3 2024-01-01 01:00:00  284.0  0.7  0.0     88.0      0.0 -34.394286   
4 2024-01-01 01:20:00  284.0  0.7  0.0     88.0      0.0 -34.277814   

   longitude                  vesselId                    portId  etaParsed  \
0 -57.851300  61e9f3a8b937134a3c4bfdf7  61d371c43aeaecc07011a37f          8   
1 -57.966135  61e9f3a8b937134a3c4bfdf7  61d371c43aeaecc07011a37f          8   
2 -58.080969  61e9f3a8b937134a3c4bfdf7  61d371c43aeaecc07011a37f          8   
3 -58.195804  61e9f3a8b937134a3c4bfdf7  61d371c43aeaecc07011a37f          8   
4 -58.310639  61e9f3a8b937134a3c4bfdf7  61d371c43aeaecc07011a37f          8   

  UN_LOCODE ISO  portLongitude  portLatitude  
0     CLSAI  CL     -71.618889      -33.5875  
1   

In [12]:

time_diffs = total_df["time"].diff()
time_interval = time_diffs.dropna().iloc[0]
time_interval = int(time_interval.total_seconds()/(60*20))



total_df.set_index("time", inplace=True)

In [None]:
total_df.to_csv("../../Project materials(1)/data_resampled_20min_markov.csv")

NameError: name 'total_df' is not defined

In [13]:
print(total_df.columns)

Index(['cog', 'sog', 'rot', 'heading', 'navstat', 'latitude', 'longitude',
       'vesselId', 'portId', 'etaParsed', 'UN_LOCODE', 'ISO', 'portLongitude',
       'portLatitude'],
      dtype='object')


In [None]:
total_df = pd.read_csv("../../Project materials(1)/data_resampled_20min_markov.csv")
total_df["time"] = pd.to_datetime(total_df['time'])
total_df.set_index("time", inplace=True)

In [5]:
print(len(total_df))

print(total_df.head())

5899721
                       cog  sog  rot  heading  navstat   latitude  longitude  \
time                                                                           
2024-01-01 00:00:00  284.0  0.7  0.0     88.0      0.0 -34.743700 -57.851300   
2024-01-01 00:20:00  284.0  0.7  0.0     88.0      0.0 -34.627229 -57.966135   
2024-01-01 00:40:00  284.0  0.7  0.0     88.0      0.0 -34.510757 -58.080969   
2024-01-01 01:00:00  284.0  0.7  0.0     88.0      0.0 -34.394286 -58.195804   
2024-01-01 01:20:00  284.0  0.7  0.0     88.0      0.0 -34.277814 -58.310639   

                                     vesselId                    portId  \
time                                                                      
2024-01-01 00:00:00  61e9f3a8b937134a3c4bfdf7  61d371c43aeaecc07011a37f   
2024-01-01 00:20:00  61e9f3a8b937134a3c4bfdf7  61d371c43aeaecc07011a37f   
2024-01-01 00:40:00  61e9f3a8b937134a3c4bfdf7  61d371c43aeaecc07011a37f   
2024-01-01 01:00:00  61e9f3a8b937134a3c4bfdf7  61d371c43

In [7]:
STEPSIZES = [1] #3, 6, 18, 36, 72, 144, 216, 288, 360]
OUTPUT_WINDOW = 1
INPUT_WINDOW = 4
OUTPUT_FORECAST = ["latitude", "longitude", "cog", "sog", "rot", "heading"]

In [14]:
#Make time series into supervised problem

# 1 = 20 minutes
# 3 = 1 hour
# 18 = 6 hours
# 72 = 24 hours
# 144 = 2 days
# 216 = 3 days
# 288 = 4 days
# 360 = 5 days



def make_supervised(df, forecast_columns, sorting_column, input_window=1, output_window=1):
    """
    Converts a multivariate time series dataframe into a supervised learning problem.
    
    Parameters:
    df (pd.DataFrame): The original dataframe with time series data.
    forecast_columns (list): A list of column names to forecast.
    input_window (int): The number of past observations to use as features.
    output_window (int): The number of steps to forecast into the future.
    
    Returns:
    pd.DataFrame: A new dataframe with supervised learning format.
    """
    

    df_new = pd.DataFrame()
    #Put in a for loop here where you iterate over all IDs, to make sure things get correct
    unique_sorts = df[sorting_column].unique()

    forbidden_cols = ["vesselId", "UN_LOCODE", "ISO", "portId", "etaParsed"]
    
    #Iterate through all IDs
    print("Creating supervised data")
    for i, sorts in enumerate(unique_sorts):
        
        loadBar.load_bar(len(unique_sorts),i+1)
        df_supervised = pd.DataFrame()
        sort_df = df[df[sorting_column] == sorts]

        #Iterate through all columns for input features
        for col in sort_df.columns: 
            if col in forbidden_cols:
                    continue
            for i in range(input_window, 0, -1):
                df_supervised[f"{col}_t-{i}"] = sort_df[col].shift(i)
            
            df_supervised[f"{col}_t"] = sort_df[col]
            

        # Create columns for forecast (target) with forward shift
        for col in forecast_columns:
            for j in range(output_window, 0,-1):
                df_supervised[f"{col}_t+{j}"] = sort_df[col].shift(-j)

        df_supervised = df_supervised.dropna()
        
        df_new = pd.concat([df_new, df_supervised])
    
    return df_new

# total_df = pd.read_csv("../../build_resources/data_resampled_20min_markov.csv")



In [35]:
total_df = make_supervised(total_df, OUTPUT_FORECAST, "vesselId", input_window=INPUT_WINDOW, output_window=OUTPUT_WINDOW)

Creating supervised data
[--------------------] 0.72% complete

KeyboardInterrupt: 

In [18]:
print(len(total_df))
print(total_df.head())

5896285
                     cog_t-4  cog_t-3  cog_t-2  cog_t-1  cog_t  sog_t-4  \
time                                                                      
2024-01-01 01:20:00    284.0    284.0    284.0    284.0  284.0      0.7   
2024-01-01 01:40:00    284.0    284.0    284.0    284.0  284.0      0.7   
2024-01-01 02:00:00    284.0    284.0    284.0    284.0  284.0      0.7   
2024-01-01 02:20:00    284.0    284.0    284.0    284.0  284.0      0.7   
2024-01-01 02:40:00    284.0    284.0    284.0    284.0  284.0      0.7   

                     sog_t-3  sog_t-2  sog_t-1  sog_t  ...  portLatitude_t-3  \
time                                                   ...                     
2024-01-01 01:20:00      0.7      0.7      0.7    0.7  ...          -33.5875   
2024-01-01 01:40:00      0.7      0.7      0.7    0.7  ...          -33.5875   
2024-01-01 02:00:00      0.7      0.7      0.7    0.7  ...          -33.5875   
2024-01-01 02:20:00      0.7      0.7      0.7    0.7  ...        

In [3]:

# Display the first few rows of the dataframe
# total_df.to_csv("../../Project materials(1)/data_resampled_20min_super.csv")
total_df = pd.read_csv("../../Project materials(1)/data_resampled_20min_super.csv")
total_df["time"] = pd.to_datetime(total_df['time'])
total_df.set_index("time", inplace=True)


In [4]:


#Sorting columns
def sort_columns(df):
    
    # Extract suffixes and assign _t as _t0
    columns_with_suffix = []
    for col in df.columns:
        match = re.search(r"_t([+-]?\d*)$", col)
        # If there's no number after _t, treat it as _t0
        suffix = int(match.group(1)) if match.group(1) else 0
        columns_with_suffix.append((col, suffix))
    
    # Sort by suffix value (ascending)
    sorted_t_columns = [col for col, _ in sorted(columns_with_suffix, key=lambda x: x[1])]
    
    # Reorder dataframe columns
    return df[sorted_t_columns]

total_df = total_df.dropna()
print(len(total_df))
total_df = total_df.sort_index(ascending = True)
total_df=sort_columns(total_df)

print(total_df)



5896285
                     cog_t-4  sog_t-4  rot_t-4  heading_t-4  navstat_t-4  \
time                                                                       
2024-01-01 01:20:00    284.0      0.7      0.0         88.0          0.0   
2024-01-01 01:20:00     11.5      0.0      0.0        352.0          5.0   
2024-01-01 01:20:00    320.0      1.2      0.0        134.0          0.0   
2024-01-01 01:20:00    162.8      9.9      0.0        164.0          0.0   
2024-01-01 01:20:00    271.5      0.0      0.0        242.0          5.0   
...                      ...      ...      ...          ...          ...   
2024-05-07 23:20:00    352.5      0.0      0.0        174.0          5.0   
2024-05-07 23:20:00    224.6      0.2      0.0        282.0          1.0   
2024-05-07 23:20:00    296.3     14.7      3.0        298.0          0.0   
2024-05-07 23:20:00    113.6      0.0      0.0        113.0          5.0   
2024-05-07 23:20:00    342.0      7.8      0.0        336.0          0.0   

   

In [5]:
print(total_df.head()) 

                     cog_t-4  sog_t-4  rot_t-4  heading_t-4  navstat_t-4  \
time                                                                       
2024-01-01 01:20:00    284.0      0.7      0.0         88.0          0.0   
2024-01-01 01:20:00     11.5      0.0      0.0        352.0          5.0   
2024-01-01 01:20:00    320.0      1.2      0.0        134.0          0.0   
2024-01-01 01:20:00    162.8      9.9      0.0        164.0          0.0   
2024-01-01 01:20:00    271.5      0.0      0.0        242.0          5.0   

                     latitude_t-4  longitude_t-4  portLongitude_t-4  \
time                                                                  
2024-01-01 01:20:00     -34.74370      -57.85130         -71.618889   
2024-01-01 01:20:00      44.40593        8.88505           8.916389   
2024-01-01 01:20:00      52.32413        2.10535           1.150000   
2024-01-01 01:20:00      33.63707     -118.23330        -118.265278   
2024-01-01 01:20:00      53.57537        

In [8]:


def train_test_split(df, perc1, perc2, output_window):
    y_list = []
    for j in range(output_window):
        for col in OUTPUT_FORECAST:
            y_list.append(f"{col}_t+{j+1}")
    ys = df[y_list]
    Xs = df.drop(columns = y_list)

    X_train = Xs.iloc[:int(np.round(Xs.shape[0]*perc1)),:]
    y_train = ys.iloc[:int(np.round(Xs.shape[0]*perc1)),:]
    X_val = Xs.iloc[int(np.round(Xs.shape[0]*perc1)):int(np.round(Xs.shape[0]*perc2)),:]
    y_val = ys.iloc[int(np.round(Xs.shape[0]*perc1)):int(np.round(Xs.shape[0]*perc2)),:]
    X_test = Xs.iloc[int(np.round(Xs.shape[0]*perc2)):,:]
    y_test = ys.iloc[int(np.round(Xs.shape[0]*perc2)):,:]

    return X_train, y_train, X_val, y_val, X_test, y_test



X_train, y_train, X_val, y_val, X_test, y_test = train_test_split(total_df, 0.75, 0.85, OUTPUT_WINDOW)



In [9]:
def evaluate(stepsize, preds, y_val):
    print("/"+"-"*50+"\\")
    print("Evaluating model with stepsize", stepsize)

    results = {
        "MAE": mean_absolute_error(y_val, preds),
        "MSE": np.square(np.subtract(y_val,preds)).mean(),
        "R2 Score": r2_score(y_val, preds),
        "RMSE": np.sqrt(np.square(np.subtract(y_val,preds)).mean())
    }

    for metric, value in results.items():
        print(f"{metric}: {value}")
    print("\\"+"-"*50+"/")



In [None]:
#Tuning params
# We need to use XGB

# Define model with a high num_boost_round
model = xgb.XGBRFRegressor(
    objective="reg:squarederror",
    tree_method="hist",  # or "hist" if not using GPU
    n_jobs=-1
)

# Define parameter grid
param_grid = {
    "max_depth": [3, 5, 7],
    "learning_rate": [1],
    "subsample": [0.6, 0.8, 1.0],
    "colsample_bynode": [0.4, 0.6, 0.8],
    "num_parallel_tree": [50, 100, 200]
}

# Use TimeSeriesSplit for time series cross-validation
tscv = TimeSeriesSplit(n_splits=10)

# Set early stopping and validation set in fit parameters
fit_params = {
    "eval_set": [(X_val, y_val)],  # Validation set to monitor performance
    "verbose": 1
}

# RandomizedSearchCV with early stopping
random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_grid,
    n_iter=20,  # Number of sampled parameter combinations
    scoring="neg_mean_squared_error",
    cv=tscv,
    verbose=1,
    random_state=42
)

# Fit with early stopping
random_search.fit(X_train, y_train, **fit_params)

# Output best parameters and number of boosting rounds
best_params = random_search.best_params_
best_num_boost_round = model.get_booster().best_iteration  # Retrieve best boosting rounds
print("Best parameters:", best_params)
print("Best num_boost_round:", best_num_boost_round)


Fitting 10 folds for each of 20 candidates, totalling 200 fits


ValueError: 
All the 200 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
200 fits failed with the following error:
Traceback (most recent call last):
  File "/home/susan-palencia/anaconda3/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/susan-palencia/anaconda3/lib/python3.12/site-packages/xgboost/core.py", line 726, in inner_f
    return func(**kwargs)
           ^^^^^^^^^^^^^^
TypeError: XGBRFRegressor.fit() got an unexpected keyword argument 'early_stopping_rounds'


In [9]:

print(X_train)



dtrain = xgb.DMatrix(X_train, label=y_train)
dval = xgb.DMatrix(X_val, label=y_val)
dtest_X = xgb.DMatrix(X_test)

params = {"objective": "reg:squarederror",
            "max_depth": 5,
            "booster": "gbtree",
            "tree-method": "gpu_hist",
            "col_sample_bynode": 0.5,
            "num_parallel_tree": 100,
            "subsample": 0.8,
            "learning_rate": 1,
            #"n_estimators": 100,
            #"reg_alpha": 0.1,
            #"reg_lambda": 0.1,
            #"n_jobs": -1,
            "verbosity": 1
            }

num_boost_round = 10

early_stopping_rounds = 2

print(dtrain)

model = xgb.train(params, dtrain, num_boost_round, evals=[(dval, "validation")], early_stopping_rounds=early_stopping_rounds, verbose_eval=True)


preds = model.predict(dtest_X)



                     cog_t-4  sog_t-4  rot_t-4  heading_t-4  navstat_t-4  \
time                                                                       
2024-01-01 01:20:00    284.0      0.7      0.0         88.0          0.0   
2024-01-01 01:20:00     11.5      0.0      0.0        352.0          5.0   
2024-01-01 01:20:00    320.0      1.2      0.0        134.0          0.0   
2024-01-01 01:20:00    162.8      9.9      0.0        164.0          0.0   
2024-01-01 01:20:00    271.5      0.0      0.0        242.0          5.0   
...                      ...      ...      ...          ...          ...   
2024-04-04 10:20:00    276.0     16.6      0.0        276.0          0.0   
2024-04-04 10:20:00    322.1     15.6      0.0        326.0          0.0   
2024-04-04 10:20:00    217.7     14.4      0.0        216.0          0.0   
2024-04-04 10:20:00    204.6     18.3      0.0        205.0          0.0   
2024-04-04 10:20:00    346.9      0.0      0.0        317.0          5.0   

           

Parameters: { "col_sample_bynode", "tree-method" } are not used.



[0]	validation-rmse:18.25536
[1]	validation-rmse:18.00081
[2]	validation-rmse:17.84957
[3]	validation-rmse:17.75762
[4]	validation-rmse:17.66046
[5]	validation-rmse:17.59065
[6]	validation-rmse:17.51540
[7]	validation-rmse:17.46008
[8]	validation-rmse:17.42471
[9]	validation-rmse:17.39337


In [16]:
def closest_n_min_mark(timestamp, n=1):
    timestamp = pd.to_datetime(timestamp)
    minutes = timestamp.minute
    closest_mark = round(minutes / (n*20)) * n*20
    if closest_mark == 60:
        rounded_timestamp = timestamp.replace(minute=0, second=0, microsecond=0) + pd.Timedelta(hours=1)
    else:
        rounded_timestamp = timestamp.replace(minute=closest_mark, second=0, microsecond=0)
    
    return rounded_timestamp

In [11]:
def shift_to_back(process_df):      
    for _, col in enumerate(OUTPUT_FORECAST):

        max_suffix_neg = 0
        max_suffix_pos = 0
        
        # Identify existing suffixes in the process_df for the current column
        while f"{col}_t-{max_suffix_neg+1}" in process_df.columns:
            max_suffix_neg += 1
        while f"{col}_t+{max_suffix_pos+1}" in process_df.columns:
            max_suffix_pos += 1
        for shift in range(max_suffix_neg - 1, -max_suffix_pos, -1):  # Start from max_suffix-1 down to 0
            if shift == 0:
                # Set the new predicted value as the most recent
                process_df[f"{col}_t"] = process_df[f"{col}_t+1"]
            elif shift == 1:
                # Shift the column
                process_df[f"{col}_t-{shift}"] = process_df[f"{col}_t"]
            elif shift > 1:
                # Shift the column
                process_df[f"{col}_t-{shift}"] = process_df[f"{col}_t-{shift - 1}"]
            else:
                process_df[f"{col}_t+{-shift}"] = process_df[f"{col}_t+{-shift + 1}"]

        for shift in range(1, max_suffix_pos+1):
            process_df = process_df.drop(columns=[f"{col}_t+{shift}"])
    
    return process_df


In [12]:
def predict_far_future(model, features, test_df,  forecast_columns):
    
    X_test = features.copy().iloc[-1:]
    preds = pd.DataFrame(columns=["vesselId", "approximate_time"])
    
    # Determine the furthest time in 20-minute intervals
    furthest_time = closest_n_min_mark(test_df["time"].max())
    current_time = closest_n_min_mark(X_test.index.max())
    
    # Generate the future time steps at 20-minute intervals
    future_steps = pd.date_range(start=current_time, end=furthest_time, freq='20min')
    
    for future_time in future_steps:
        y_pred = model.predict(xgb.DMatrix(X_test))

        new_row = pd.DataFrame({
            "vesselId": [test_df["vesselId"].iloc[0]],
            "approximate_time": [future_time]
        })
        for idx, col in enumerate(forecast_columns):
            new_row[f"{col}"] = y_pred[0, idx]  # Use the predicted value
        #new_row = markov.add_as_columns(new_row)
        
        
        preds = pd.concat([preds, new_row], ignore_index=True)
        
        # Update X_test for the next iteration
        for idx, col in enumerate(forecast_columns):

            max_suffix = 0
            
            # Identify existing suffixes in the X_test for the current column
            while f"{col}_t-{max_suffix+1}" in X_test.columns:
                max_suffix += 1
            for shift in range(max_suffix - 1, -1, -1):  # Start from max_suffix-1 down to 0
                if shift == 0:
                    # Set the new predicted value as the most recent
                    X_test[f"{col}_t"] = y_pred[0, idx]
                elif shift == 1:
                    # Shift the column
                    X_test[f"{col}_t-{shift}"] = X_test[f"{col}_t"]
                else:
                    # Shift the column
                    X_test[f"{col}_t-{shift}"] = X_test[f"{col}_t-{shift - 1}"]
    
    return preds


csv_parser = CSVParser(folderpath="../../Project materials(1)")

test_df = csv_parser.retrieve_test_data()




In [18]:
def predict_times(model,total_df,test_df):
    unique_sorts = test_df["vesselId"].unique()
    preds_df = pd.DataFrame()
    result = pd.DataFrame()

    for sorts in unique_sorts:
        latest_features=total_df[total_df["vesselId"] == sorts]
        test_by_vessel_df = test_df[test_df["vesselId"] == sorts]
        latest_features = make_supervised(latest_features, OUTPUT_FORECAST, "vesselId" , INPUT_WINDOW, OUTPUT_WINDOW)
        latest_features = shift_to_back(latest_features)
        latest_features = sort_columns(latest_features)
        preds = predict_far_future(model, latest_features, test_by_vessel_df, OUTPUT_FORECAST)
        preds_df = pd.concat([preds_df, preds])
    
    for test in test_df.iterrows():
        test=pd.Series(test[1])
        new_row = pd.DataFrame()
        new_row=preds_df[
            (preds_df["vesselId"] == test["vesselId"]) & 
            (preds_df["approximate_time"] == closest_n_min_mark(test["time"]))
            ][["latitude", "longitude"]]
        new_row["ID"] = test["ID"]
        new_row["time"] = test["time"]
        
        result = pd.concat([result, new_row])
    result["latitude_predicted"] = result["latitude"]
    result["longitude_predicted"] = result["longitude"]

    return result[["ID","longitude_predicted","latitude_predicted"]]

print(test_df)
total_df = pd.read_csv("../../Project materials(1)/data_resampled_20min.csv")
total_df["time"] = pd.to_datetime(total_df['time'])
total_df.set_index("time", inplace=True)
print(total_df.head())
result_df = predict_times(model, total_df, test_df)
print(result_df)





          ID                  vesselId                time  scaling_factor
0          0  61e9f3aeb937134a3c4bfe3d 2024-05-08 00:03:16             0.3
1          1  61e9f473b937134a3c4c02df 2024-05-08 00:06:17             0.3
2          2  61e9f469b937134a3c4c029b 2024-05-08 00:10:02             0.3
3          3  61e9f45bb937134a3c4c0221 2024-05-08 00:10:34             0.3
4          4  61e9f38eb937134a3c4bfd8d 2024-05-08 00:12:27             0.3
...      ...                       ...                 ...             ...
51734  51734  61e9f3a8b937134a3c4bfdf3 2024-05-12 23:59:58             0.1
51735  51735  61e9f3b4b937134a3c4bfe77 2024-05-12 23:59:58             0.1
51736  51736  61e9f46cb937134a3c4c02b7 2024-05-12 23:59:58             0.1
51737  51737  61e9f465b937134a3c4c0269 2024-05-12 23:59:58             0.1
51738  51738  61e9f3adb937134a3c4bfe39 2024-05-12 23:59:58             0.1

[51739 rows x 4 columns]
                       cog  sog  rot  heading  navstat   latitude  longitu

  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


Creating supervised data


  preds = pd.concat([preds, new_row], ignore_index=True)


         ID  longitude_predicted  latitude_predicted
2         0           -81.872749           31.170771
2         1           120.029510           14.514480
2         2            10.880864           38.644531
2         3           173.261154          -43.644081
3         4            -6.459991           48.384121
...     ...                  ...                 ...
1088  51734           -79.743683           25.780249
1185  51735           141.545029           41.920494
1024  51736           141.545029           35.554764
375   51737            22.119131           59.696823
362   51738            10.098973           56.301670

[51739 rows x 3 columns]


In [20]:
#turn results into a csv file
result_df.to_csv("../../Project materials(1)/results.csv", index=False)

In [None]:
_ = plot_importance(model, height=0.9)

### First model:
Included navstat and etaParsed

Timewindow: (3,2)

MAE: 0.8521843262281953 

MSE: longitude_t+1    21.225563

latitude_t+1      1.993130

longitude_t+2    38.471488

latitude_t+2      3.840146

dtype: float64

R2 Score: 0.9958523607729776

RMSE: longitude_t+1    4.607121

latitude_t+1     1.411783

longitude_t+2    6.202539

latitude_t+2     1.959629

dtype: float64


### Second model:

Added cog, rot and heading to target features.

Timewindow: (3,2)

MAE: 7.198335594601071
MSE: latitude_t+1        1.980426
longitude_t+1      21.577318
cog_t+1          1820.208937
rot_t+1            92.532501
heading_t+1      1172.604934
latitude_t+2        3.813640
longitude_t+2      39.218475
cog_t+2          2370.325440
rot_t+2           107.991661
heading_t+2      1769.347459
dtype: float64
R2 Score: 0.8826565909012996
RMSE: latitude_t+1      1.407276
longitude_t+1     4.645139
cog_t+1          42.663907
rot_t+1           9.619382
heading_t+1      34.243320
latitude_t+2      1.952854
longitude_t+2     6.262466
cog_t+2          48.685988
rot_t+2          10.391904
heading_t+2      42.063612
dtype: float64
