In [1]:
import pandas as pd

In [7]:
df = pd.read_csv('/content/modified_ais_data.csv',nrows=100000)

In [8]:
df.head()

Unnamed: 0,BaseDateTime,MMSI,IMO,LAT,LON,SOG,COG
0,2022-03-31T00:00:01,671226100,IMO9221322,25.77626,-80.2032,3.2,143.7
1,2022-03-31T00:00:06,367452810,IMO9602344,29.32824,-94.77391,2.6,319.2
2,2022-03-31T00:00:09,366919770,IMO9253583,48.74428,-122.49504,0.0,210.3
3,2022-03-31T00:00:00,311000966,IMO8916607,18.55833,-66.4791,17.5,274.5
4,2022-03-31T00:00:01,219028420,IMO9411305,28.66703,-93.59339,9.5,129.4


In [9]:
import math

def lat_lon_to_cartesian(lat, lon, R=6371):
    lat_rad = math.radians(lat)
    lon_rad = math.radians(lon)
    x = R * math.cos(lat_rad) * math.cos(lon_rad)
    y = R * math.cos(lat_rad) * math.sin(lon_rad)
    z = R * math.sin(lat_rad)
    return x, y, z

def cartesian_to_lat_lon(x, y, z, R=6371):
    lon = math.degrees(math.atan2(y, x))
    lat = math.degrees(math.asin(z / R))
    return lat, lon


In [10]:
df[['x', 'y', 'z']] = df.apply(lambda row: pd.Series(lat_lon_to_cartesian(row['LAT'], row['LON'])), axis=1)

In [11]:
df.head()

Unnamed: 0,BaseDateTime,MMSI,IMO,LAT,LON,SOG,COG,x,y,z
0,2022-03-31T00:00:01,671226100,IMO9221322,25.77626,-80.2032,3.2,143.7,976.189651,-5653.417795,2770.480464
1,2022-03-31T00:00:06,367452810,IMO9602344,29.32824,-94.77391,2.6,319.2,-462.261129,-5535.146877,3120.593645
2,2022-03-31T00:00:09,366919770,IMO9253583,48.74428,-122.49504,0.0,210.3,-2256.980451,-3543.426543,4789.552023
3,2022-03-31T00:00:00,311000966,IMO8916607,18.55833,-66.4791,17.5,274.5,2410.348516,-5537.896973,2027.697745
4,2022-03-31T00:00:01,219028420,IMO9411305,28.66703,-93.59339,9.5,129.4,-350.359001,-5579.067584,3056.287693


In [28]:
import numpy as np
def haversine(lat1, lon1, lat2, lon2, R=6371):
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
    c = 2 * np.arcsin(np.sqrt(a))
    return R * c

In [29]:
df["BaseDateTime"] = pd.to_datetime(df["BaseDateTime"])

In [30]:
def calculate_distances(group):
    group = group.sort_values(by="BaseDateTime")
    group["next_lat"] = group["LAT"].shift(-1)
    group["next_long"] = group["LON"].shift(-1)
    group["distance_km"] = group.apply(
        lambda row: haversine(row["LAT"], row["LON"], row["next_lat"], row["next_long"])
        if not np.isnan(row["next_lat"])
        else 0,
        axis=1,
    )
    return group.drop(columns=["next_lat", "next_long"])

In [33]:
df = df.groupby("MMSI").apply(calculate_distances).reset_index(drop=True)
df.head()

  df = df.groupby("MMSI").apply(calculate_distances).reset_index(drop=True)


Unnamed: 0,BaseDateTime,MMSI,IMO,LAT,LON,SOG,COG,x,y,z,distance_km
0,2022-03-31 00:02:32,111,IMO0000001,27.35372,-94.62546,0.4,228.6,-456.32303,-5640.208831,2927.363085,0.00395
1,2022-03-31 00:05:35,111,IMO0000001,27.35372,-94.6255,0.6,219.8,-456.326968,-5640.208513,2927.363085,0.008126
2,2022-03-31 00:08:34,111,IMO0000001,27.35377,-94.62556,0.2,221.7,-456.332668,-5640.205489,2927.368023,0.003479
3,2022-03-31 00:11:31,111,IMO0000001,27.3538,-94.62557,0.3,105.0,-456.333529,-5640.203881,2927.370986,0.022308
4,2022-03-31 00:14:33,111,IMO0000001,27.35365,-94.62542,0.3,173.4,-456.319381,-5640.212715,2927.356172,0.002224


In [34]:
def calculate_durations(group):
    # Ensure BaseDateTime is a datetime type
    group['BaseDateTime'] = pd.to_datetime(group['BaseDateTime'])
    # Calculate the duration between consecutive points in seconds
    group['duration_seconds'] = group['BaseDateTime'].diff().dt.total_seconds()
    return group

In [35]:
df = df.groupby("MMSI", group_keys=False).apply(calculate_durations)
df = df.reset_index(drop=True)
df.head()

  df = df.groupby("MMSI", group_keys=False).apply(calculate_durations)


Unnamed: 0,BaseDateTime,MMSI,IMO,LAT,LON,SOG,COG,x,y,z,distance_km,duration_seconds
0,2022-03-31 00:02:32,111,IMO0000001,27.35372,-94.62546,0.4,228.6,-456.32303,-5640.208831,2927.363085,0.00395,
1,2022-03-31 00:05:35,111,IMO0000001,27.35372,-94.6255,0.6,219.8,-456.326968,-5640.208513,2927.363085,0.008126,183.0
2,2022-03-31 00:08:34,111,IMO0000001,27.35377,-94.62556,0.2,221.7,-456.332668,-5640.205489,2927.368023,0.003479,179.0
3,2022-03-31 00:11:31,111,IMO0000001,27.3538,-94.62557,0.3,105.0,-456.333529,-5640.203881,2927.370986,0.022308,177.0
4,2022-03-31 00:14:33,111,IMO0000001,27.35365,-94.62542,0.3,173.4,-456.319381,-5640.212715,2927.356172,0.002224,182.0


In [36]:
df_new = df[['x','y','z','distance_km','duration_seconds','SOG','COG']]

In [37]:
df_new.head()

Unnamed: 0,x,y,z,distance_km,duration_seconds,SOG,COG
0,-456.32303,-5640.208831,2927.363085,0.00395,,0.4,228.6
1,-456.326968,-5640.208513,2927.363085,0.008126,183.0,0.6,219.8
2,-456.332668,-5640.205489,2927.368023,0.003479,179.0,0.2,221.7
3,-456.333529,-5640.203881,2927.370986,0.022308,177.0,0.3,105.0
4,-456.319381,-5640.212715,2927.356172,0.002224,182.0,0.3,173.4


Velocity Components:

Convert speed over ground (SOG) to meters per second:
Speed
(
m/s
)
=
SOG (knots)
×
0.514444
Speed(m/s)=SOG (knots)×0.514444
Calculate the velocity components using COG (Course Over Ground in degrees):
𝑣
𝑥
=
Speed
×
cos
⁡
(
COG in radians
)
v
x
​
 =Speed×cos(COG in radians)
𝑣
𝑦
=
Speed
×
sin
⁡
(
COG in radians
)
v
y
​
 =Speed×sin(COG in radians)

The 2D position update formula calculates the new position
(
𝑥
next
,
𝑦
next
)
(x
next
​
 ,y
next
​
 ) of an object moving in a 2D plane, given its current position
(
𝑥
,
𝑦
)
(x,y), speed, direction, and time interval.

Formula:
𝑥
next
=
𝑥
+
𝑣
𝑥
⋅
Δ
𝑡
x
next
​
 =x+v
x
​
 ⋅Δt
𝑦
next
=
𝑦
+
𝑣
𝑦
⋅
Δ
𝑡
y
next
​
 =y+v
y
​
 ⋅Δt
Where:

𝑣
𝑥
v
x
​
  and
𝑣
𝑦
v
y
​
  are the velocity components in the
𝑥
x- and
𝑦
y-directions, respectively:

𝑣
𝑥
=
𝑣
⋅
cos
⁡
(
𝜃
)
v
x
​
 =v⋅cos(θ)
𝑣
𝑦
=
𝑣
⋅
sin
⁡
(
𝜃
)
v
y
​
 =v⋅sin(θ)
𝑣
v is the speed (in meters per second or any consistent unit).

𝜃
θ is the direction of movement in radians (e.g., the Course Over Ground (COG) in nautical contexts).

Δ
𝑡
Δt is the duration of movement (in seconds or any consistent unit).

Example:
If an object is moving with:

Current position
(
𝑥
,
𝑦
)
=
(
100
,
200
)
(x,y)=(100,200),
Speed
𝑣
=
5
v=5 m/s,
Direction
𝜃
=
4
5
∘
θ=45
∘
  (or
𝜋
/
4
π/4 radians),
Duration
Δ
𝑡
=
10
Δt=10 seconds,
We calculate:

𝑣
𝑥
=
5
⋅
cos
⁡
(
𝜋
/
4
)
=
5
⋅
0.707
≈
3.535
v
x
​
 =5⋅cos(π/4)=5⋅0.707≈3.535
𝑣
𝑦
=
5
⋅
sin
⁡
(
𝜋
/
4
)
=
5
⋅
0.707
≈
3.535
v
y
​
 =5⋅sin(π/4)=5⋅0.707≈3.535
Update positions:
𝑥
next
=
100
+
3.535
⋅
10
=
135.35
x
next
​
 =100+3.535⋅10=135.35
𝑦
next
=
200
+
3.535
⋅
10
=
235.35
y
next
​
 =200+3.535⋅10=235.35
The new position is approximately:

(
𝑥
next
,
𝑦
next
)
=
(
135.35
,
235.35
)
(x
next
​
 ,y
next
​
 )=(135.35,235.35)

In [40]:
def calculate_next_positions(df):
    # Define constants
    KNOTS_TO_METERS_PER_SEC = 0.514444

    # Initialize new columns
    df["lx"] = df["x"]  # Default to current x
    df["ly"] = df["y"]  # Default to current y
    df["lz"] = df["z"]  # Default to current z

    for i in range(len(df) - 1):  # Loop through all except the last row
        # Skip rows with NaN duration or speed
        if pd.isna(df.loc[i, "duration_seconds"]) or df.loc[i, "SOG"] == 0:
            continue

        # Convert SOG to meters per second
        speed = df.loc[i, "SOG"] * KNOTS_TO_METERS_PER_SEC

        # Convert COG to radians
        cog_radians = np.radians(df.loc[i, "COG"])

        # Compute velocity components
        vx = speed * np.cos(cog_radians)
        vy = speed * np.sin(cog_radians)

        # Calculate next positions
        df.loc[i + 1, "lx"] = df.loc[i, "x"] + vx * df.loc[i, "duration_seconds"]
        df.loc[i + 1, "ly"] = df.loc[i, "y"] + vy * df.loc[i, "duration_seconds"]
        df.loc[i + 1, "lz"] = df.loc[i, "z"]  # Assume z remains constant

    return df



In [41]:
pred_df = calculate_next_positions(df)
pred_df.head()

Unnamed: 0,BaseDateTime,MMSI,IMO,LAT,LON,SOG,COG,x,y,z,distance_km,duration_seconds,lx,ly,lz
0,2022-03-31 00:02:32,111,IMO0000001,27.35372,-94.62546,0.4,228.6,-456.32303,-5640.208831,2927.363085,0.00395,,-456.32303,-5640.208831,2927.363085
1,2022-03-31 00:05:35,111,IMO0000001,27.35372,-94.6255,0.6,219.8,-456.326968,-5640.208513,2927.363085,0.008126,183.0,-456.326968,-5640.208513,2927.363085
2,2022-03-31 00:08:34,111,IMO0000001,27.35377,-94.62556,0.2,221.7,-456.332668,-5640.205489,2927.368023,0.003479,179.0,-499.724194,-5676.365718,2927.363085
3,2022-03-31 00:11:31,111,IMO0000001,27.3538,-94.62557,0.3,105.0,-456.333529,-5640.203881,2927.370986,0.022308,177.0,-470.083575,-5652.457099,2927.368023
4,2022-03-31 00:14:33,111,IMO0000001,27.35365,-94.62542,0.3,173.4,-456.319381,-5640.212715,2927.356172,0.002224,182.0,-463.403683,-5613.817708,2927.370986
