# IUU Fishing Detection - Data Exploration

This notebook explores the AIS data and visualizes vessel trajectories.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from pathlib import Path

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## Load Data

In [None]:
# Load AIS data
df = pd.read_csv('../data/raw/ais_data.csv', parse_dates=['timestamp'])
print(f"Total records: {len(df)}")
print(f"Total vessels: {df['MMSI'].nunique()}")
print(f"Time range: {df['timestamp'].min()} to {df['timestamp'].max()}")
df.head()

## Data Statistics

In [None]:
# Basic statistics
df.describe()

In [None]:
# Speed distribution
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.hist(df['SOG'], bins=50, edgecolor='black')
plt.xlabel('Speed Over Ground (knots)')
plt.ylabel('Frequency')
plt.title('Speed Distribution')

plt.subplot(1, 2, 2)
plt.hist(df['COG'], bins=36, edgecolor='black')
plt.xlabel('Course Over Ground (degrees)')
plt.ylabel('Frequency')
plt.title('Course Distribution')

plt.tight_layout()
plt.show()

## Vessel Trajectories

In [None]:
# Plot trajectories for sample vessels
sample_vessels = df['MMSI'].unique()[:5]

plt.figure(figsize=(12, 8))
for mmsi in sample_vessels:
    vessel_data = df[df['MMSI'] == mmsi].sort_values('timestamp')
    plt.plot(vessel_data['lon'], vessel_data['lat'], marker='o', markersize=2, label=f'MMSI: {mmsi}')

plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Sample Vessel Trajectories')
plt.legend()
plt.grid(True)
plt.show()

## Interactive Map

In [None]:
# Create interactive map
center_lat = df['lat'].mean()
center_lon = df['lon'].mean()

m = folium.Map(location=[center_lat, center_lon], zoom_start=6)

# Add sample trajectories
for mmsi in sample_vessels:
    vessel_data = df[df['MMSI'] == mmsi].sort_values('timestamp')
    points = vessel_data[['lat', 'lon']].values.tolist()
    folium.PolyLine(points, color='blue', weight=2, opacity=0.7).add_to(m)

m

## Temporal Analysis

In [None]:
# Messages per hour
df['hour'] = df['timestamp'].dt.hour
hourly_counts = df.groupby('hour').size()

plt.figure(figsize=(12, 4))
plt.bar(hourly_counts.index, hourly_counts.values)
plt.xlabel('Hour of Day')
plt.ylabel('Number of Messages')
plt.title('AIS Messages by Hour')
plt.grid(True, axis='y')
plt.show()