In [1]:
import pandas as pd
import numpy as np

# Load AIS data with extracted features (e.g., from the previous feature extraction code)
vessel_data = pd.read_csv('vessel_data_with_features.csv')

# Calculate Z-scores for Speed Over Ground (SOG) and Course Over Ground (COG)
vessel_data['sog_zscore'] = (vessel_data['SOG'] - vessel_data['SOG'].mean()) / vessel_data['SOG'].std()
vessel_data['cog_zscore'] = (vessel_data['COG'] - vessel_data['COG'].mean()) / vessel_data['COG'].std()

# Flag anomalies where Z-score is greater than a threshold (e.g., 3)
vessel_data['sog_anomaly'] = vessel_data['sog_zscore'].abs() > 3
vessel_data['cog_anomaly'] = vessel_data['cog_zscore'].abs() > 3

# Save results to CSV
vessel_data.to_csv('Generated_csv\\vessel_data_with_anomalies_zscore.csv', index=False)
print("Anomalies detected using Z-score analysis have been saved to 'vessel_data_with_anomalies_zscore.csv'")


Anomalies detected using Z-score analysis have been saved to 'vessel_data_with_anomalies_zscore.csv'


In [3]:
# Calculate moving averages for SOG and COG
vessel_data['sog_moving_avg'] = vessel_data['SOG'].rolling(window=5).mean()
vessel_data['cog_moving_avg'] = vessel_data['COG'].rolling(window=5).mean()

# Calculate anomalies based on deviations from the moving average
vessel_data['sog_anomaly_moving_avg'] = np.abs(vessel_data['SOG'] - vessel_data['sog_moving_avg']) > 2 * vessel_data['SOG'].std()
vessel_data['cog_anomaly_moving_avg'] = np.abs(vessel_data['COG'] - vessel_data['cog_moving_avg']) > 2 * vessel_data['COG'].std()

# Save results to CSV
vessel_data.to_csv('Generated_csv\\vessel_data_with_anomalies_moving_avg.csv', index=False)
print("Anomalies detected using moving average analysis have been saved to 'vessel_data_with_anomalies_moving_avg.csv'")


Anomalies detected using moving average analysis have been saved to 'vessel_data_with_anomalies_moving_avg.csv'


In [3]:
from pykalman import KalmanFilter
import pandas as pd
import numpy as np

# Load AIS data with extracted features (e.g., from the previous feature extraction code)
vessel_data = pd.read_csv('Generated_csv\\vessel_data_with_features.csv')
# Example: Kalman Filter for Speed (SOG)
sog_values = vessel_data['SOG'].values

# Initialize Kalman Filter
kf = KalmanFilter(initial_state_mean=0, n_dim_obs=1)

# Estimate speed
state_means, state_covariances = kf.em(sog_values).filter(sog_values)

# Detect anomalies where actual SOG significantly deviates from Kalman Filter prediction
vessel_data['sog_kalman'] = state_means
vessel_data['sog_anomaly_kalman'] = np.abs(vessel_data['SOG'] - vessel_data['sog_kalman']) > 2 * vessel_data['SOG'].std()

# Save results to CSV
vessel_data.to_csv('Generated_csv\\vessel_data_with_anomalies_kalman.csv', index=False)
print("Anomalies detected using Kalman Filter have been saved to 'vessel_data_with_anomalies_kalman.csv'")


Anomalies detected using Kalman Filter have been saved to 'vessel_data_with_anomalies_kalman.csv'
