In [1]:
import pandas as pd

# Load the CSV file
file_path = 'aisdk-2024-11-12.csv'  # Update with your file path
df = pd.read_csv(file_path)

# Step 1: Remove rows where 'Destination' is 'Unknown' and 'ETA' is empty (NaN)
df = df[(df['Destination'] != 'Unknown') & (df['ETA'].notna())]

In [3]:
# Step 2: Keep only rows where 'Ship type' is either 'Cargo' or 'Tanker'
df = df[df['Ship type'].isin(['Cargo', 'Tanker'])]

In [9]:
df = df.drop_duplicates(subset=['IMO', '# Timestamp'], keep='first')

In [11]:
df = df[df['Navigational status'] != 'Unknown']

In [15]:
# Check the cleaned data
print("Number of rows after cleaning:", len(df))
print(df.head())

Number of rows after cleaning: 100000
                # Timestamp Type of mobile       MMSI   Latitude  Longitude  \
482390  12/11/2024 04:32:26        Class A  244830834  54.192375   9.531470   
701084  12/11/2024 06:34:33        Class A  538006764  55.596737  15.103172   
777103  12/11/2024 07:16:40        Class A  211686000  54.594628  12.781865   
453     12/11/2024 00:01:27        Class A  255915667  54.955152  13.491590   
92399   12/11/2024 00:55:16        Class A  256882000  55.414628  10.990253   

           Navigational status  ROT   SOG    COG  Heading  ... Length  \
482390  Under way using engine  0.0   7.0  249.8    249.0  ...  119.0   
701084  Under way using engine  0.7  13.4  241.9    243.0  ...  183.0   
777103  Under way using engine  0.0   8.9   74.0     73.0  ...   75.0   
453     Under way using engine  0.0  15.9  250.7    250.0  ...  170.0   
92399   Under way using engine  0.0  10.9  327.2    325.0  ...  205.0   

       Type of position fixing device Draught   

In [17]:
# Step 4: Save the cleaned data to a new CSV file
output_file_path = 'cleaned_data.csv'
df.to_csv(output_file_path, index=False)
print(f"Cleaned data saved to {output_file_path}")

Cleaned data saved to cleaned_data.csv


In [None]:
import pandas as pd
import folium
from folium.plugins import FastMarkerCluster

# Load the cleaned CSV data
file_path = 'cleaned_data.csv'  # Update with your actual file path
df = pd.read_csv(file_path)

# Initialize a folium map centered at a general location
m = folium.Map(location=[0, 0], zoom_start=2)

# Separate the data into cargo and tanker ships
cargo_ships = df[df['Ship type'] == 'Cargo']
tanker_ships = df[df['Ship type'] == 'Tanker']

# Function to add a marker cluster to the map
def add_markers(data, color, cluster):
    for _, row in data.iterrows():
        # Add a marker with a popup showing the ship name
        folium.Marker(
            location=[row['Latitude'], row['Longitude']],
            popup=f"Name: {row['Name']}",
            icon=folium.Icon(color=color)
        ).add_to(cluster)

# Add FastMarkerCluster for cargo ships (blue color)
cargo_cluster = FastMarkerCluster([]).add_to(m)
add_markers(cargo_ships, 'blue', cargo_cluster)

# Add FastMarkerCluster for tanker ships (red color)
tanker_cluster = FastMarkerCluster([]).add_to(m)
add_markers(tanker_ships, 'red', tanker_cluster)

# Display the map
m