In [3]:
# ===========================================
# ILLEGAL FISHING AI DETECTOR - STEP 1: LOAD DATA
# ===========================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
import warnings
warnings.filterwarnings('ignore')

# Upload your data folder (drag the entire 'data' folder here)
from google.colab import files
print("üìÅ Drag your 'data' folder here:")
uploaded = files.upload()

# Load the 3 datasets
df_trajectories = pd.read_csv('data\128_fishing_trajs.csv')  # Main AIS data


print("‚úÖ DATA LOADED!")
print(f"Trajectories: {df_trajectories.shape}")

print("\nTrajectories columns:", df_trajectories.columns.tolist()[:10])


üìÅ Drag your 'data' folder here:


Saving 128_fishing_trajs.csv to 128_fishing_trajs.csv


FileNotFoundError: [Errno 2] No such file or directory: 'data\n8_fishing_trajs.csv'

In [4]:
df_trajectories = pd.read_csv('128_fishing_trajs.csv')


In [6]:
df_trajectories.columns


Index(['id', 't', 'longitude', 'latitude', 'x', 'y', 'signed_turn', 'bearing',
       'time_gap', 'distance_gap', 'euc_speed', 'distanceToShore', 'label'],
      dtype='object')

In [7]:
df_trajectories['label'].value_counts()


Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
02-fishing,764388
03-sailing,129645
01-sailing,120589
04-fishing,11133
05-sailing,524


In [8]:
# create new column
df_trajectories['fishing_binary'] = df_trajectories['label'].apply(
    lambda x: 1 if 'fishing' in x else 0
)

df_trajectories['fishing_binary'].value_counts()


Unnamed: 0_level_0,count
fishing_binary,Unnamed: 1_level_1
1,775521
0,250758


In [9]:
features = [
    'longitude',
    'latitude',
    'signed_turn',
    'bearing',
    'time_gap',
    'distance_gap',
    'euc_speed',
    'distanceToShore'
]

X = df_trajectories[features]
y = df_trajectories['fishing_binary']


In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [11]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=120,
    max_depth=18,
    n_jobs=-1,
    random_state=42
)

model.fit(X_train, y_train)


In [12]:
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

y_pred = model.predict(X_test)


In [13]:
print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.9949721323615388


In [14]:
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.99      0.99      0.99     50152
           1       1.00      1.00      1.00    155104

    accuracy                           0.99    205256
   macro avg       0.99      0.99      0.99    205256
weighted avg       0.99      0.99      0.99    205256



In [15]:
print(confusion_matrix(y_test, y_pred))


[[ 49509    643]
 [   389 154715]]


In [16]:
import pandas as pd

sample = X_test.iloc[0:1]   # take one unseen vessel point

prediction = model.predict(sample)

print("Prediction:", "Fishing" if prediction[0]==1 else "Sailing")


Prediction: Fishing


In [17]:
# take one unseen ship point
sample = X_test.iloc[0:1]

# ML prediction
prediction = model.predict(sample)

# get distance from shore
distance = sample['distanceToShore'].values[0]

print("Distance from shore (meters):", distance)
print("Model says:", "Fishing" if prediction[0]==1 else "Sailing")

# rule engine
if prediction[0] == 1 and distance < 20000:
    print("‚ö†Ô∏è WARNING: Possible Illegal Fishing Detected")
else:
    print("No illegal activity")


Distance from shore (meters): 37651.997808592
Model says: Fishing
No illegal activity


In [18]:
alerts = []

# check first 500 unseen points
for i in range(500):

    sample = X_test.iloc[i:i+1]
    prediction = model.predict(sample)[0]
    distance = sample['distanceToShore'].values[0]

    # illegal fishing condition
    if prediction == 1 and distance < 20000:
        alerts.append(i)

print("Total suspicious detections:", len(alerts))


Total suspicious detections: 161


In [19]:
suspicious_vessels = df_trajectories.iloc[X_test.index[alerts]]

suspicious_vessels[['id','longitude','latitude','distanceToShore']].head()


Unnamed: 0,id,longitude,latitude,distanceToShore
29726,219001039-2,8.063177,55.595673,2115.984017
446324,219014851-2,8.036378,55.625473,4745.186264
6187,211477000-3,12.271288,56.253045,13829.793264
169234,219004128-3,11.886823,56.342547,19283.188728
442461,219014851-2,8.03856,55.633118,4888.415986


In [20]:
report = suspicious_vessels[['id','longitude','latitude','distanceToShore']]

report.to_csv("illegal_fishing_report.csv", index=False)

print("Illegal fishing report generated successfully!")


Illegal fishing report generated successfully!


In [21]:
!pip install folium




In [22]:
import folium

# create base map
m = folium.Map(location=[suspicious_vessels['latitude'].mean(),
                         suspicious_vessels['longitude'].mean()],
               zoom_start=5)

# add red markers for illegal fishing
for _, row in suspicious_vessels.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=3,
        color='red',
        fill=True
    ).add_to(m)

m


In [23]:
!pip install streamlit


Collecting streamlit
  Downloading streamlit-1.54.0-py3-none-any.whl.metadata (9.8 kB)
Collecting cachetools<7,>=5.5 (from streamlit)
  Downloading cachetools-6.2.6-py3-none-any.whl.metadata (5.6 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.54.0-py3-none-any.whl (9.1 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m9.1/9.1 MB[0m [31m50.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cachetools-6.2.6-py3-none-any.whl (11 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m64.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cachetools, pydeck, streamlit
  Attempting uninstall: cachetools
    Found existing in

In [24]:
import joblib
joblib.dump(model, "fishing_model.pkl")


['fishing_model.pkl']