In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import binascii
from shapely import wkb
import pyproj
from pyproj import Transformer

def convert_wkb_to_coordinates(wkb_string):
    # Step 0: Check if the value is a float
    if isinstance(wkb_string, float):
        return None, None

    # Step 1: Decode the hexadecimal string to binary format
    binary_string = binascii.unhexlify(wkb_string)

    # Step 2: Binary format parsing
    geometry = wkb.loads(binary_string)

    # Step 3: Convert coordinates to latitude and longitude
    if geometry and hasattr(geometry, 'wkt'):
        # Convert to Well-Known Text (WKT) format
        wkt = geometry.wkt

        # Define coordinate system transformation
        transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326")

        # Perform a coordinate transformation
        transformed = transformer.transform(geometry.x, geometry.y)

        # Extract latitude and longitude
        latitude, longitude = transformed[0], transformed[1]

        return latitude, longitude
    else:
        return None, None

# Load data from CSV file
#D:\장우영\LOCALSEARCH\Ship_DA\DA\data\FAmerge_20230531_103345.csv
data = pd.read_csv('D:/장우영/LOCALSEARCH/Ship_DA/DA/data/FAmerge_20230531_103345.csv', encoding='ANSI')

# Filter data according to conditions in a specific column
filtered_data = data[data['sog'] > 3]  # Certain columns and conditions must be modified accordingly

data = filtered_data

# Apply coordinate transformation and update dataframe
data[['latitude', 'longitude']] = data['geom'].apply(convert_wkb_to_coordinates).apply(pd.Series)

# convert "insert_time" to a number
data['year'] = pd.to_datetime(data['insert_time']).dt.year
data['month'] = pd.to_datetime(data['insert_time']).dt.month
data['day'] = pd.to_datetime(data['insert_time']).dt.day
data['hour'] = pd.to_datetime(data['insert_time']).dt.hour
data['minute'] = pd.to_datetime(data['insert_time']).dt.minute
data['second'] = pd.to_datetime(data['insert_time']).dt.minute

# Character selection
X = data[["mmsi", "ship_type", "latitude","longitude", "cog", "sog", "year", "month", "day", "hour", "minute", "second", "풍향", "유향", "기온", "수온", "풍속", "유속", "기압", "습도"]]
# select target variable
y_latitude = data["latitude"]  # Latitude values from the original dataset
y_longitude = data["longitude"]  # Longitude value from the original dataset
y_cog = data["cog"]  # COG value from the original dataset
y_sog = data["sog"]  # SOG value from the original dataset

# split the data into training and test sets
X_train, X_test, y_latitude_train, y_latitude_test, y_longitude_train, y_longitude_test, y_cog_train, y_cog_test, y_sog_train, y_sog_test = train_test_split(
    X, y_latitude, y_longitude, y_cog, y_sog, test_size=0.2, random_state=42
)

# Train Latitude model and make predictions
model_latitude = xgb.XGBRegressor()
model_latitude.fit(X_train, y_latitude_train)
y_latitude_pred = model_latitude.predict(X_test)

# Longitude model training and prediction
model_longitude = xgb.XGBRegressor()
model_longitude.fit(X_train, y_longitude_train)
y_longitude_pred = model_longitude.predict(X_test)

# COG model training and prediction
model_cog = xgb.XGBRegressor()
model_cog.fit(X_train, y_cog_train)
y_cog_pred = model_cog.predict(X_test)

# Train SOG model and predict
model_sog = xgb.XGBRegressor()
model_sog.fit(X_train, y_sog_train)
y_sog_pred = model_sog.predict(X_test)

# Accuracy Prediction
mse_latitude = mean_squared_error(y_latitude_test, y_latitude_pred)
mse_longitude = mean_squared_error(y_longitude_test, y_longitude_pred)
mse_cog = mean_squared_error(y_cog_test, y_cog_pred)
mse_sog = mean_squared_error(y_sog_test, y_sog_pred)

# Print the predicted Latitude, Longitude, COG, and SOG values
print("Predicted Latitude:", y_latitude_pred)
print("Predicted Longitude:", y_longitude_pred)
print("Predicted COG:", y_cog_pred)
print("Predicted SOG:", y_sog_pred)

# Accuracy Prediction
print("MSE Latitude:", mse_latitude)
print("MSE Longitude:", mse_longitude)
print("MSE COG:", mse_cog)
print("MSE SOG:", mse_sog)


Predicted Latitude: [34.968105 35.088757 35.101173 ... 34.957752 35.14165  35.0395  ]
Predicted Longitude: [129.1891  129.08821 129.08289 ... 129.07344 129.3745  128.99721]
Predicted COG: [235.25908 306.01062  85.02329 ... 345.60358 217.78711 306.06586]
Predicted SOG: [10.8009615 15.501712   5.3006487 ... 10.097366  11.198612  10.0006695]
MSE Latitude: 1.4540076803250032e-06
MSE Longitude: 8.261425142975267e-06
MSE COG: 0.057729287652559115
MSE SOG: 0.0003252940461795145
