In [3]:
# ------------------- IMPORT LIBRARIES -------------------
import pandas as pd
import numpy as np
from geopy.distance import geodesic
from tqdm import tqdm
import os

In [4]:
# ------------------- SETTINGS -------------------
data_path = "DATA/[P1] AIS Data/nari_dynamic.csv"  # AIS data file
master_csv = "near_collision_master.csv"           # Master CSV to store results
distance_threshold = 1852                          # meters (1 nautical mile)
chunk_size = 100_000                               # number of rows per chunk

In [5]:
# ------------------- FUNCTION TO CALCULATE DISTANCE -------------------
def compute_distance(row1, row2):
    """Return distance (meters) between two points (lat, lon)."""
    return geodesic((row1["lat"], row1["lon"]), (row2["lat"], row2["lon"])).meters


In [6]:
# ------------------- INITIALIZE MASTER CSV -------------------
master_csv = "near_collision_master.csv"
file_exists = os.path.exists(master_csv)  # check if file already exists

# ------------------- PROCESS DATA IN CHUNKS -------------------
chunk_counter = 0

for chunk in pd.read_csv(data_path, chunksize=chunk_size):
    chunk_counter += 1
    print(f"\nProcessing chunk {chunk_counter} with {len(chunk)} rows")

    # Rename column if needed
    if "sourcemmsi" in chunk.columns:
        chunk = chunk.rename(columns={"sourcemmsi": "mmsi"})

    # Drop missing crucial values
    chunk = chunk.dropna(subset=["mmsi", "lat", "lon", "t"])

    # Convert timestamp and sort
    chunk["datetime"] = pd.to_datetime(chunk["t"], unit="s")
    chunk = chunk.sort_values("datetime")

    # ------------------- FILTER MOVING SHIPS -------------------
    chunk = chunk[chunk["speedoverground"] > 0.5]  # only ships moving > 0.5 knots

    # ------------------- GROUP BY 1-MINUTE TIME WINDOWS -------------------
    chunk["time_window"] = chunk["datetime"].dt.floor("1min")
    grouped = chunk.groupby("time_window")

    # ------------------- NEAR-COLLISION DETECTION -------------------
    near_collisions = []

    for time_window, group in tqdm(grouped, desc=f"Chunk {chunk_counter}"):
        ships = group.to_dict("records")
        n = len(ships)
        
        seen_pairs = set()  # reset for each minute

        for i in range(n):
            for j in range(i + 1, n):
                ship1, ship2 = ships[i], ships[j]

                # Skip self-collision
                if ship1["mmsi"] == ship2["mmsi"]:
                    continue

                # Avoid duplicate pairs
                pair_id = tuple(sorted([ship1["mmsi"], ship2["mmsi"]]))
                if pair_id in seen_pairs:
                    continue
                seen_pairs.add(pair_id)

                # Compute distance
                dist = compute_distance(ship1, ship2)
                if dist < distance_threshold:
                    near_collisions.append({
                        "time_window": time_window,
                        "mmsi_1": ship1["mmsi"],
                        "mmsi_2": ship2["mmsi"],
                        "lat_1": ship1["lat"], "lon_1": ship1["lon"],
                        "lat_2": ship2["lat"], "lon_2": ship2["lon"],
                        "distance_m": dist,
                        "speed_1": ship1["speedoverground"], "speed_2": ship2["speedoverground"],
                        "course_1": ship1["courseoverground"], "course_2": ship2["courseoverground"]
                    })

    # ------------------- APPEND RESULTS TO MASTER CSV -------------------
    collisions_df = pd.DataFrame(near_collisions)
    print(f"Near-collision events found in chunk {chunk_counter}: {len(collisions_df)}")

    # Append to CSV with header only if file does not exist yet
    collisions_df.to_csv(master_csv, index=False, mode="a", header=not file_exists)

    # After first write, file now exists
    file_exists = True



Processing chunk 1 with 100000 rows


Chunk 1: 100%|██████████| 1929/1929 [00:10<00:00, 191.46it/s]


Near-collision events found in chunk 1: 1648

Processing chunk 2 with 100000 rows


Chunk 2: 100%|██████████| 1728/1728 [00:05<00:00, 300.09it/s]


Near-collision events found in chunk 2: 922

Processing chunk 3 with 100000 rows


Chunk 3: 100%|██████████| 1747/1747 [00:03<00:00, 546.30it/s]


Near-collision events found in chunk 3: 182

Processing chunk 4 with 100000 rows


Chunk 4: 100%|██████████| 1699/1699 [00:03<00:00, 439.29it/s]


Near-collision events found in chunk 4: 643

Processing chunk 5 with 100000 rows


Chunk 5: 100%|██████████| 1360/1360 [00:03<00:00, 344.31it/s]


Near-collision events found in chunk 5: 618

Processing chunk 6 with 100000 rows


Chunk 6: 100%|██████████| 1014/1014 [00:11<00:00, 85.59it/s]


Near-collision events found in chunk 6: 2387

Processing chunk 7 with 100000 rows


Chunk 7: 100%|██████████| 1110/1110 [00:07<00:00, 149.06it/s]


Near-collision events found in chunk 7: 858

Processing chunk 8 with 100000 rows


Chunk 8: 100%|██████████| 1368/1368 [00:05<00:00, 232.55it/s]


Near-collision events found in chunk 8: 916

Processing chunk 9 with 100000 rows


Chunk 9: 100%|██████████| 2102/2102 [00:04<00:00, 474.89it/s]


Near-collision events found in chunk 9: 542

Processing chunk 10 with 100000 rows


Chunk 10: 100%|██████████| 1860/1860 [00:07<00:00, 233.29it/s]


Near-collision events found in chunk 10: 894

Processing chunk 11 with 100000 rows


Chunk 11: 100%|██████████| 1595/1595 [00:06<00:00, 251.77it/s]


Near-collision events found in chunk 11: 1318

Processing chunk 12 with 100000 rows


Chunk 12: 100%|██████████| 1613/1613 [00:06<00:00, 258.81it/s]


Near-collision events found in chunk 12: 804

Processing chunk 13 with 100000 rows


Chunk 13: 100%|██████████| 1530/1530 [00:03<00:00, 398.50it/s]


Near-collision events found in chunk 13: 602

Processing chunk 14 with 100000 rows


Chunk 14: 100%|██████████| 1638/1638 [00:05<00:00, 324.60it/s]


Near-collision events found in chunk 14: 921

Processing chunk 15 with 100000 rows


Chunk 15: 100%|██████████| 1664/1664 [00:03<00:00, 443.62it/s]


Near-collision events found in chunk 15: 293

Processing chunk 16 with 100000 rows


Chunk 16: 100%|██████████| 1580/1580 [00:03<00:00, 436.97it/s]


Near-collision events found in chunk 16: 660

Processing chunk 17 with 100000 rows


Chunk 17: 100%|██████████| 1624/1624 [00:04<00:00, 405.56it/s]


Near-collision events found in chunk 17: 753

Processing chunk 18 with 100000 rows


Chunk 18: 100%|██████████| 1445/1445 [00:09<00:00, 150.43it/s]


Near-collision events found in chunk 18: 607

Processing chunk 19 with 100000 rows


Chunk 19: 100%|██████████| 1721/1721 [00:07<00:00, 238.62it/s]


Near-collision events found in chunk 19: 924

Processing chunk 20 with 100000 rows


Chunk 20: 100%|██████████| 2113/2113 [00:03<00:00, 547.00it/s]


Near-collision events found in chunk 20: 136

Processing chunk 21 with 100000 rows


Chunk 21: 100%|██████████| 1782/1782 [00:05<00:00, 318.22it/s]


Near-collision events found in chunk 21: 292

Processing chunk 22 with 100000 rows


Chunk 22: 100%|██████████| 1247/1247 [00:04<00:00, 274.06it/s]


Near-collision events found in chunk 22: 391

Processing chunk 23 with 100000 rows


Chunk 23: 100%|██████████| 1382/1382 [00:05<00:00, 238.40it/s]


Near-collision events found in chunk 23: 577

Processing chunk 24 with 100000 rows


Chunk 24: 100%|██████████| 1262/1262 [00:03<00:00, 370.73it/s]


Near-collision events found in chunk 24: 489

Processing chunk 25 with 100000 rows


Chunk 25: 100%|██████████| 1775/1775 [00:04<00:00, 420.64it/s]


Near-collision events found in chunk 25: 453

Processing chunk 26 with 100000 rows


Chunk 26: 100%|██████████| 1914/1914 [00:05<00:00, 345.74it/s]


Near-collision events found in chunk 26: 262

Processing chunk 27 with 100000 rows


Chunk 27: 100%|██████████| 1539/1539 [00:11<00:00, 128.68it/s]


Near-collision events found in chunk 27: 619

Processing chunk 28 with 100000 rows


Chunk 28: 100%|██████████| 1669/1669 [00:04<00:00, 336.03it/s]


Near-collision events found in chunk 28: 505

Processing chunk 29 with 100000 rows


Chunk 29: 100%|██████████| 1495/1495 [00:06<00:00, 238.98it/s]


Near-collision events found in chunk 29: 636

Processing chunk 30 with 100000 rows


Chunk 30: 100%|██████████| 1425/1425 [00:05<00:00, 243.08it/s]


Near-collision events found in chunk 30: 649

Processing chunk 31 with 100000 rows


Chunk 31: 100%|██████████| 1738/1738 [00:05<00:00, 330.05it/s]


Near-collision events found in chunk 31: 763

Processing chunk 32 with 100000 rows


Chunk 32: 100%|██████████| 1726/1726 [00:04<00:00, 369.99it/s]


Near-collision events found in chunk 32: 399

Processing chunk 33 with 100000 rows


Chunk 33: 100%|██████████| 1464/1464 [00:11<00:00, 127.72it/s]


Near-collision events found in chunk 33: 730

Processing chunk 34 with 100000 rows


Chunk 34: 100%|██████████| 1757/1757 [00:08<00:00, 212.04it/s]


Near-collision events found in chunk 34: 531

Processing chunk 35 with 100000 rows


Chunk 35: 100%|██████████| 1642/1642 [00:07<00:00, 209.96it/s]


Near-collision events found in chunk 35: 970

Processing chunk 36 with 100000 rows


Chunk 36: 100%|██████████| 1816/1816 [00:07<00:00, 227.47it/s]


Near-collision events found in chunk 36: 1359

Processing chunk 37 with 100000 rows


Chunk 37: 100%|██████████| 1949/1949 [00:16<00:00, 116.04it/s]


Near-collision events found in chunk 37: 540

Processing chunk 38 with 100000 rows


Chunk 38: 100%|██████████| 1932/1932 [00:11<00:00, 170.49it/s]


Near-collision events found in chunk 38: 363

Processing chunk 39 with 100000 rows


Chunk 39: 100%|██████████| 2030/2030 [00:03<00:00, 533.97it/s]


Near-collision events found in chunk 39: 404

Processing chunk 40 with 100000 rows


Chunk 40: 100%|██████████| 1427/1427 [00:05<00:00, 250.90it/s]


Near-collision events found in chunk 40: 665

Processing chunk 41 with 100000 rows


Chunk 41: 100%|██████████| 1035/1035 [00:11<00:00, 89.09it/s] 


Near-collision events found in chunk 41: 1575

Processing chunk 42 with 100000 rows


Chunk 42: 100%|██████████| 1340/1340 [00:02<00:00, 456.36it/s]


Near-collision events found in chunk 42: 621

Processing chunk 43 with 100000 rows


Chunk 43: 100%|██████████| 1339/1339 [00:02<00:00, 648.00it/s]


Near-collision events found in chunk 43: 229

Processing chunk 44 with 100000 rows


Chunk 44: 100%|██████████| 1102/1102 [00:05<00:00, 199.93it/s]


Near-collision events found in chunk 44: 405

Processing chunk 45 with 100000 rows


Chunk 45: 100%|██████████| 1294/1294 [00:08<00:00, 160.23it/s]


Near-collision events found in chunk 45: 1657

Processing chunk 46 with 100000 rows


Chunk 46: 100%|██████████| 1578/1578 [00:06<00:00, 235.88it/s]


Near-collision events found in chunk 46: 627

Processing chunk 47 with 100000 rows


Chunk 47: 100%|██████████| 1463/1463 [00:08<00:00, 181.25it/s]


Near-collision events found in chunk 47: 1297

Processing chunk 48 with 100000 rows


Chunk 48: 100%|██████████| 1496/1496 [00:12<00:00, 116.13it/s]


Near-collision events found in chunk 48: 1089

Processing chunk 49 with 100000 rows


Chunk 49: 100%|██████████| 1522/1522 [00:06<00:00, 253.55it/s]


Near-collision events found in chunk 49: 607

Processing chunk 50 with 100000 rows


Chunk 50: 100%|██████████| 1700/1700 [00:02<00:00, 781.34it/s]


Near-collision events found in chunk 50: 37

Processing chunk 51 with 100000 rows


Chunk 51: 100%|██████████| 1591/1591 [00:04<00:00, 333.08it/s]


Near-collision events found in chunk 51: 492

Processing chunk 52 with 100000 rows


Chunk 52: 100%|██████████| 1554/1554 [00:07<00:00, 207.21it/s]


Near-collision events found in chunk 52: 564

Processing chunk 53 with 100000 rows


Chunk 53: 100%|██████████| 1586/1586 [00:15<00:00, 104.53it/s]


Near-collision events found in chunk 53: 779

Processing chunk 54 with 100000 rows


Chunk 54: 100%|██████████| 1740/1740 [00:09<00:00, 191.36it/s]


Near-collision events found in chunk 54: 1144

Processing chunk 55 with 100000 rows


Chunk 55: 100%|██████████| 2191/2191 [00:05<00:00, 432.78it/s]


Near-collision events found in chunk 55: 400

Processing chunk 56 with 100000 rows


Chunk 56: 100%|██████████| 1796/1796 [00:06<00:00, 263.05it/s]


Near-collision events found in chunk 56: 327

Processing chunk 57 with 100000 rows


Chunk 57: 100%|██████████| 1753/1753 [00:06<00:00, 270.82it/s]


Near-collision events found in chunk 57: 875

Processing chunk 58 with 100000 rows


Chunk 58: 100%|██████████| 1690/1690 [00:08<00:00, 193.84it/s]


Near-collision events found in chunk 58: 1036

Processing chunk 59 with 100000 rows


Chunk 59: 100%|██████████| 1708/1708 [00:06<00:00, 278.99it/s]


Near-collision events found in chunk 59: 688

Processing chunk 60 with 100000 rows


Chunk 60: 100%|██████████| 2403/2403 [00:04<00:00, 482.13it/s]


Near-collision events found in chunk 60: 300

Processing chunk 61 with 100000 rows


Chunk 61: 100%|██████████| 2159/2159 [00:06<00:00, 346.77it/s]


Near-collision events found in chunk 61: 689

Processing chunk 62 with 100000 rows


Chunk 62: 100%|██████████| 2201/2201 [00:10<00:00, 200.38it/s]


Near-collision events found in chunk 62: 408

Processing chunk 63 with 100000 rows


Chunk 63: 100%|██████████| 1339/1339 [00:48<00:00, 27.48it/s] 


Near-collision events found in chunk 63: 423

Processing chunk 64 with 100000 rows


Chunk 64: 100%|██████████| 2317/2317 [00:11<00:00, 197.72it/s]


Near-collision events found in chunk 64: 616

Processing chunk 65 with 100000 rows


Chunk 65: 100%|██████████| 2381/2381 [00:06<00:00, 372.50it/s]


Near-collision events found in chunk 65: 378

Processing chunk 66 with 100000 rows


Chunk 66: 100%|██████████| 2037/2037 [00:06<00:00, 336.88it/s]


Near-collision events found in chunk 66: 800

Processing chunk 67 with 100000 rows


Chunk 67: 100%|██████████| 1810/1810 [00:05<00:00, 330.97it/s]


Near-collision events found in chunk 67: 579

Processing chunk 68 with 100000 rows


Chunk 68: 100%|██████████| 2088/2088 [00:02<00:00, 1004.71it/s]


Near-collision events found in chunk 68: 22

Processing chunk 69 with 100000 rows


Chunk 69: 100%|██████████| 1232/1232 [01:13<00:00, 16.78it/s] 


Near-collision events found in chunk 69: 235

Processing chunk 70 with 100000 rows


Chunk 70: 100%|██████████| 2095/2095 [00:04<00:00, 492.00it/s]


Near-collision events found in chunk 70: 572

Processing chunk 71 with 100000 rows


Chunk 71: 100%|██████████| 1837/1837 [00:05<00:00, 310.78it/s]


Near-collision events found in chunk 71: 866

Processing chunk 72 with 100000 rows


Chunk 72: 100%|██████████| 2129/2129 [00:03<00:00, 601.93it/s]


Near-collision events found in chunk 72: 416

Processing chunk 73 with 100000 rows


Chunk 73: 100%|██████████| 2140/2140 [00:02<00:00, 752.02it/s] 


Near-collision events found in chunk 73: 381

Processing chunk 74 with 100000 rows


Chunk 74: 100%|██████████| 2031/2031 [00:06<00:00, 306.84it/s]


Near-collision events found in chunk 74: 460

Processing chunk 75 with 100000 rows


Chunk 75: 100%|██████████| 1936/1936 [00:07<00:00, 267.23it/s]


Near-collision events found in chunk 75: 1137

Processing chunk 76 with 100000 rows


Chunk 76: 100%|██████████| 1897/1897 [00:04<00:00, 473.25it/s]


Near-collision events found in chunk 76: 400

Processing chunk 77 with 100000 rows


Chunk 77: 100%|██████████| 2324/2324 [00:04<00:00, 538.52it/s] 


Near-collision events found in chunk 77: 431

Processing chunk 78 with 100000 rows


Chunk 78: 100%|██████████| 1770/1770 [00:06<00:00, 254.76it/s]


Near-collision events found in chunk 78: 1159

Processing chunk 79 with 100000 rows


Chunk 79: 100%|██████████| 1523/1523 [00:05<00:00, 291.98it/s]


Near-collision events found in chunk 79: 680

Processing chunk 80 with 100000 rows


Chunk 80: 100%|██████████| 1320/1320 [00:03<00:00, 416.85it/s]


Near-collision events found in chunk 80: 308

Processing chunk 81 with 100000 rows


Chunk 81: 100%|██████████| 1683/1683 [00:04<00:00, 355.69it/s]


Near-collision events found in chunk 81: 1076

Processing chunk 82 with 100000 rows


Chunk 82: 100%|██████████| 1576/1576 [00:03<00:00, 480.53it/s]


Near-collision events found in chunk 82: 1432

Processing chunk 83 with 100000 rows


Chunk 83: 100%|██████████| 1046/1046 [00:10<00:00, 100.69it/s]


Near-collision events found in chunk 83: 2115

Processing chunk 84 with 100000 rows


Chunk 84: 100%|██████████| 1200/1200 [00:09<00:00, 122.62it/s]


Near-collision events found in chunk 84: 2065

Processing chunk 85 with 100000 rows


Chunk 85: 100%|██████████| 1227/1227 [00:08<00:00, 148.94it/s]


Near-collision events found in chunk 85: 1496

Processing chunk 86 with 100000 rows


Chunk 86: 100%|██████████| 947/947 [00:19<00:00, 48.68it/s] 


Near-collision events found in chunk 86: 2217

Processing chunk 87 with 100000 rows


Chunk 87: 100%|██████████| 1025/1025 [00:09<00:00, 109.17it/s]


Near-collision events found in chunk 87: 2089

Processing chunk 88 with 100000 rows


Chunk 88: 100%|██████████| 1211/1211 [00:06<00:00, 173.21it/s]


Near-collision events found in chunk 88: 903

Processing chunk 89 with 100000 rows


Chunk 89: 100%|██████████| 1330/1330 [00:05<00:00, 248.51it/s]


Near-collision events found in chunk 89: 290

Processing chunk 90 with 100000 rows


Chunk 90: 100%|██████████| 1340/1340 [00:09<00:00, 148.81it/s]


Near-collision events found in chunk 90: 269

Processing chunk 91 with 100000 rows


Chunk 91: 100%|██████████| 776/776 [01:05<00:00, 11.78it/s] 


Near-collision events found in chunk 91: 2318

Processing chunk 92 with 100000 rows


Chunk 92: 100%|██████████| 1148/1148 [00:16<00:00, 69.86it/s] 


Near-collision events found in chunk 92: 3088

Processing chunk 93 with 100000 rows


Chunk 93: 100%|██████████| 1395/1395 [00:05<00:00, 268.66it/s]


Near-collision events found in chunk 93: 415

Processing chunk 94 with 100000 rows


Chunk 94: 100%|██████████| 1160/1160 [00:06<00:00, 189.22it/s]


Near-collision events found in chunk 94: 231

Processing chunk 95 with 100000 rows


Chunk 95: 100%|██████████| 1127/1127 [00:11<00:00, 94.86it/s] 


Near-collision events found in chunk 95: 3151

Processing chunk 96 with 100000 rows


Chunk 96: 100%|██████████| 1390/1390 [00:13<00:00, 100.91it/s]


Near-collision events found in chunk 96: 2237

Processing chunk 97 with 100000 rows


Chunk 97: 100%|██████████| 1777/1777 [00:02<00:00, 622.32it/s]


Near-collision events found in chunk 97: 284

Processing chunk 98 with 100000 rows


Chunk 98: 100%|██████████| 1543/1543 [00:05<00:00, 277.01it/s]


Near-collision events found in chunk 98: 458

Processing chunk 99 with 100000 rows


Chunk 99: 100%|██████████| 1650/1650 [00:16<00:00, 102.32it/s]


Near-collision events found in chunk 99: 3409

Processing chunk 100 with 100000 rows


Chunk 100: 100%|██████████| 1444/1444 [00:11<00:00, 120.35it/s]


Near-collision events found in chunk 100: 2444

Processing chunk 101 with 100000 rows


Chunk 101: 100%|██████████| 1364/1364 [00:09<00:00, 140.15it/s]


Near-collision events found in chunk 101: 2464

Processing chunk 102 with 100000 rows


Chunk 102: 100%|██████████| 1261/1261 [00:07<00:00, 159.13it/s]


Near-collision events found in chunk 102: 1520

Processing chunk 103 with 100000 rows


Chunk 103: 100%|██████████| 1524/1524 [00:06<00:00, 223.71it/s]


Near-collision events found in chunk 103: 1193

Processing chunk 104 with 100000 rows


Chunk 104: 100%|██████████| 1738/1738 [00:01<00:00, 900.24it/s]


Near-collision events found in chunk 104: 66

Processing chunk 105 with 100000 rows


Chunk 105: 100%|██████████| 1756/1756 [00:02<00:00, 602.66it/s]


Near-collision events found in chunk 105: 483

Processing chunk 106 with 100000 rows


Chunk 106: 100%|██████████| 1883/1883 [00:03<00:00, 520.91it/s] 


Near-collision events found in chunk 106: 387

Processing chunk 107 with 100000 rows


Chunk 107: 100%|██████████| 1689/1689 [00:10<00:00, 160.15it/s]


Near-collision events found in chunk 107: 1790

Processing chunk 108 with 100000 rows


Chunk 108: 100%|██████████| 2219/2219 [00:04<00:00, 477.66it/s]


Near-collision events found in chunk 108: 1015

Processing chunk 109 with 100000 rows


Chunk 109: 100%|██████████| 1841/1841 [00:03<00:00, 491.78it/s]


Near-collision events found in chunk 109: 531

Processing chunk 110 with 100000 rows


Chunk 110: 100%|██████████| 1468/1468 [00:06<00:00, 239.87it/s]


Near-collision events found in chunk 110: 657

Processing chunk 111 with 100000 rows


Chunk 111: 100%|██████████| 1299/1299 [00:08<00:00, 149.00it/s]


Near-collision events found in chunk 111: 1457

Processing chunk 112 with 100000 rows


Chunk 112: 100%|██████████| 1274/1274 [00:05<00:00, 224.61it/s]


Near-collision events found in chunk 112: 845

Processing chunk 113 with 100000 rows


Chunk 113: 100%|██████████| 1200/1200 [00:07<00:00, 171.06it/s]


Near-collision events found in chunk 113: 909

Processing chunk 114 with 100000 rows


Chunk 114: 100%|██████████| 1649/1649 [00:05<00:00, 300.88it/s]


Near-collision events found in chunk 114: 1023

Processing chunk 115 with 100000 rows


Chunk 115: 100%|██████████| 2074/2074 [00:05<00:00, 393.07it/s]


Near-collision events found in chunk 115: 331

Processing chunk 116 with 100000 rows


Chunk 116: 100%|██████████| 1619/1619 [00:12<00:00, 131.47it/s]


Near-collision events found in chunk 116: 1445

Processing chunk 117 with 100000 rows


Chunk 117: 100%|██████████| 1602/1602 [00:09<00:00, 163.81it/s]


Near-collision events found in chunk 117: 2409

Processing chunk 118 with 100000 rows


Chunk 118: 100%|██████████| 1566/1566 [00:09<00:00, 161.17it/s]


Near-collision events found in chunk 118: 1339

Processing chunk 119 with 100000 rows


Chunk 119: 100%|██████████| 1598/1598 [00:12<00:00, 131.19it/s]


Near-collision events found in chunk 119: 1462

Processing chunk 120 with 100000 rows


Chunk 120: 100%|██████████| 1824/1824 [00:10<00:00, 167.08it/s]


Near-collision events found in chunk 120: 1795

Processing chunk 121 with 100000 rows


Chunk 121: 100%|██████████| 2149/2149 [00:04<00:00, 449.90it/s]


Near-collision events found in chunk 121: 295

Processing chunk 122 with 100000 rows


Chunk 122: 100%|██████████| 1699/1699 [00:07<00:00, 220.98it/s]


Near-collision events found in chunk 122: 909

Processing chunk 123 with 100000 rows


Chunk 123: 100%|██████████| 1328/1328 [00:16<00:00, 78.50it/s] 


Near-collision events found in chunk 123: 2113

Processing chunk 124 with 100000 rows


Chunk 124: 100%|██████████| 1521/1521 [00:13<00:00, 114.49it/s]


Near-collision events found in chunk 124: 1965

Processing chunk 125 with 100000 rows


Chunk 125: 100%|██████████| 2063/2063 [00:07<00:00, 272.32it/s]


Near-collision events found in chunk 125: 920

Processing chunk 126 with 100000 rows


Chunk 126: 100%|██████████| 1505/1505 [00:08<00:00, 184.78it/s]


Near-collision events found in chunk 126: 1846

Processing chunk 127 with 100000 rows


Chunk 127: 100%|██████████| 1645/1645 [00:12<00:00, 128.69it/s]


Near-collision events found in chunk 127: 1760

Processing chunk 128 with 100000 rows


Chunk 128: 100%|██████████| 1987/1987 [00:08<00:00, 245.53it/s]


Near-collision events found in chunk 128: 1374

Processing chunk 129 with 100000 rows


Chunk 129: 100%|██████████| 1717/1717 [00:05<00:00, 310.34it/s]


Near-collision events found in chunk 129: 828

Processing chunk 130 with 100000 rows


Chunk 130: 100%|██████████| 2285/2285 [00:05<00:00, 436.68it/s]


Near-collision events found in chunk 130: 801

Processing chunk 131 with 100000 rows


Chunk 131: 100%|██████████| 1634/1634 [00:14<00:00, 115.12it/s]


Near-collision events found in chunk 131: 3031

Processing chunk 132 with 100000 rows


Chunk 132: 100%|██████████| 1763/1763 [00:10<00:00, 174.50it/s]


Near-collision events found in chunk 132: 1743

Processing chunk 133 with 100000 rows


Chunk 133: 100%|██████████| 1745/1745 [00:04<00:00, 393.76it/s]


Near-collision events found in chunk 133: 460

Processing chunk 134 with 100000 rows


Chunk 134: 100%|██████████| 1628/1628 [00:09<00:00, 165.66it/s]


Near-collision events found in chunk 134: 2503

Processing chunk 135 with 100000 rows


Chunk 135: 100%|██████████| 2590/2590 [00:06<00:00, 374.59it/s]


Near-collision events found in chunk 135: 758

Processing chunk 136 with 100000 rows


Chunk 136: 100%|██████████| 2155/2155 [00:09<00:00, 230.95it/s]


Near-collision events found in chunk 136: 2257

Processing chunk 137 with 100000 rows


Chunk 137: 100%|██████████| 1699/1699 [00:17<00:00, 99.11it/s] 


Near-collision events found in chunk 137: 3879

Processing chunk 138 with 100000 rows


Chunk 138: 100%|██████████| 1587/1587 [00:11<00:00, 144.15it/s]


Near-collision events found in chunk 138: 1971

Processing chunk 139 with 100000 rows


Chunk 139: 100%|██████████| 1964/1964 [00:09<00:00, 210.97it/s]


Near-collision events found in chunk 139: 1534

Processing chunk 140 with 100000 rows


Chunk 140: 100%|██████████| 2542/2542 [00:06<00:00, 399.87it/s]


Near-collision events found in chunk 140: 859

Processing chunk 141 with 100000 rows


Chunk 141: 100%|██████████| 2056/2056 [00:08<00:00, 240.66it/s]


Near-collision events found in chunk 141: 2959

Processing chunk 142 with 100000 rows


Chunk 142: 100%|██████████| 1577/1577 [00:13<00:00, 120.77it/s]


Near-collision events found in chunk 142: 2142

Processing chunk 143 with 100000 rows


Chunk 143: 100%|██████████| 1665/1665 [00:10<00:00, 152.30it/s]


Near-collision events found in chunk 143: 1561

Processing chunk 144 with 100000 rows


Chunk 144: 100%|██████████| 1864/1864 [00:08<00:00, 229.20it/s]


Near-collision events found in chunk 144: 2059

Processing chunk 145 with 100000 rows


Chunk 145: 100%|██████████| 1951/1951 [00:04<00:00, 416.88it/s]


Near-collision events found in chunk 145: 733

Processing chunk 146 with 100000 rows


Chunk 146: 100%|██████████| 2338/2338 [00:04<00:00, 487.03it/s]


Near-collision events found in chunk 146: 926

Processing chunk 147 with 100000 rows


Chunk 147: 100%|██████████| 2603/2603 [00:08<00:00, 289.74it/s] 


Near-collision events found in chunk 147: 1131

Processing chunk 148 with 100000 rows


Chunk 148: 100%|██████████| 1690/1690 [00:18<00:00, 89.78it/s] 


Near-collision events found in chunk 148: 5409

Processing chunk 149 with 100000 rows


Chunk 149: 100%|██████████| 3419/3419 [00:11<00:00, 286.89it/s]


Near-collision events found in chunk 149: 1890

Processing chunk 150 with 100000 rows


Chunk 150: 100%|██████████| 5033/5033 [00:09<00:00, 546.27it/s]


Near-collision events found in chunk 150: 283

Processing chunk 151 with 100000 rows


Chunk 151: 100%|██████████| 5071/5071 [00:08<00:00, 594.06it/s]


Near-collision events found in chunk 151: 486

Processing chunk 152 with 100000 rows


Chunk 152: 100%|██████████| 5793/5793 [00:11<00:00, 504.07it/s] 


Near-collision events found in chunk 152: 321

Processing chunk 153 with 100000 rows


Chunk 153: 100%|██████████| 5508/5508 [00:07<00:00, 699.04it/s]


Near-collision events found in chunk 153: 285

Processing chunk 154 with 100000 rows


Chunk 154: 100%|██████████| 5839/5839 [00:12<00:00, 481.07it/s]


Near-collision events found in chunk 154: 189

Processing chunk 155 with 100000 rows


Chunk 155: 100%|██████████| 5151/5151 [00:13<00:00, 386.70it/s]


Near-collision events found in chunk 155: 142

Processing chunk 156 with 100000 rows


Chunk 156: 100%|██████████| 5471/5471 [00:12<00:00, 427.97it/s]


Near-collision events found in chunk 156: 226

Processing chunk 157 with 100000 rows


Chunk 157: 100%|██████████| 4500/4500 [00:07<00:00, 566.00it/s] 


Near-collision events found in chunk 157: 697

Processing chunk 158 with 100000 rows


Chunk 158: 100%|██████████| 5287/5287 [00:08<00:00, 656.04it/s] 


Near-collision events found in chunk 158: 248

Processing chunk 159 with 100000 rows


Chunk 159: 100%|██████████| 5341/5341 [00:14<00:00, 366.15it/s] 


Near-collision events found in chunk 159: 204

Processing chunk 160 with 100000 rows


Chunk 160: 100%|██████████| 5007/5007 [00:09<00:00, 505.95it/s]


Near-collision events found in chunk 160: 166

Processing chunk 161 with 100000 rows


Chunk 161: 100%|██████████| 5598/5598 [00:13<00:00, 428.01it/s]


Near-collision events found in chunk 161: 144

Processing chunk 162 with 100000 rows


Chunk 162: 100%|██████████| 4408/4408 [00:20<00:00, 212.40it/s]


Near-collision events found in chunk 162: 84

Processing chunk 163 with 100000 rows


Chunk 163: 100%|██████████| 4918/4918 [00:06<00:00, 733.39it/s] 


Near-collision events found in chunk 163: 57

Processing chunk 164 with 100000 rows


Chunk 164: 100%|██████████| 4695/4695 [00:12<00:00, 363.96it/s] 


Near-collision events found in chunk 164: 93

Processing chunk 165 with 100000 rows


Chunk 165: 100%|██████████| 4568/4568 [00:05<00:00, 823.35it/s] 


Near-collision events found in chunk 165: 100

Processing chunk 166 with 100000 rows


Chunk 166: 100%|██████████| 3858/3858 [00:07<00:00, 524.21it/s]


Near-collision events found in chunk 166: 211

Processing chunk 167 with 100000 rows


Chunk 167: 100%|██████████| 5376/5376 [00:07<00:00, 737.52it/s] 


Near-collision events found in chunk 167: 486

Processing chunk 168 with 100000 rows


Chunk 168: 100%|██████████| 5391/5391 [00:07<00:00, 754.23it/s] 


Near-collision events found in chunk 168: 544

Processing chunk 169 with 100000 rows


Chunk 169: 100%|██████████| 4916/4916 [00:14<00:00, 330.79it/s]


Near-collision events found in chunk 169: 546

Processing chunk 170 with 100000 rows


Chunk 170: 100%|██████████| 5458/5458 [00:22<00:00, 244.13it/s]


Near-collision events found in chunk 170: 683

Processing chunk 171 with 100000 rows


Chunk 171: 100%|██████████| 4265/4265 [00:19<00:00, 215.39it/s]


Near-collision events found in chunk 171: 818

Processing chunk 172 with 100000 rows


Chunk 172: 100%|██████████| 5761/5761 [00:14<00:00, 389.64it/s]


Near-collision events found in chunk 172: 243

Processing chunk 173 with 100000 rows


Chunk 173: 100%|██████████| 5261/5261 [00:14<00:00, 371.68it/s]


Near-collision events found in chunk 173: 353

Processing chunk 174 with 100000 rows


Chunk 174: 100%|██████████| 5296/5296 [00:10<00:00, 486.79it/s]


Near-collision events found in chunk 174: 471

Processing chunk 175 with 100000 rows


Chunk 175: 100%|██████████| 5339/5339 [00:07<00:00, 675.58it/s]


Near-collision events found in chunk 175: 328

Processing chunk 176 with 100000 rows


Chunk 176: 100%|██████████| 4502/4502 [00:09<00:00, 477.29it/s]


Near-collision events found in chunk 176: 379

Processing chunk 177 with 100000 rows


Chunk 177: 100%|██████████| 5283/5283 [00:09<00:00, 580.23it/s]


Near-collision events found in chunk 177: 262

Processing chunk 178 with 100000 rows


Chunk 178: 100%|██████████| 6001/6001 [00:14<00:00, 409.74it/s]


Near-collision events found in chunk 178: 755

Processing chunk 179 with 100000 rows


Chunk 179: 100%|██████████| 5331/5331 [00:16<00:00, 318.82it/s]


Near-collision events found in chunk 179: 1191

Processing chunk 180 with 100000 rows


Chunk 180: 100%|██████████| 5664/5664 [00:10<00:00, 544.29it/s]


Near-collision events found in chunk 180: 867

Processing chunk 181 with 100000 rows


Chunk 181: 100%|██████████| 6712/6712 [00:15<00:00, 443.23it/s]


Near-collision events found in chunk 181: 993

Processing chunk 182 with 100000 rows


Chunk 182: 100%|██████████| 12136/12136 [00:36<00:00, 329.79it/s]


Near-collision events found in chunk 182: 6693

Processing chunk 183 with 100000 rows


Chunk 183: 100%|██████████| 12341/12341 [00:35<00:00, 343.08it/s]


Near-collision events found in chunk 183: 27830

Processing chunk 184 with 100000 rows


Chunk 184: 100%|██████████| 8587/8587 [00:54<00:00, 158.00it/s] 


Near-collision events found in chunk 184: 56007

Processing chunk 185 with 100000 rows


Chunk 185: 100%|██████████| 6103/6103 [01:08<00:00, 89.28it/s]  


Near-collision events found in chunk 185: 64463

Processing chunk 186 with 100000 rows


Chunk 186: 100%|██████████| 7213/7213 [00:53<00:00, 134.27it/s] 


Near-collision events found in chunk 186: 44133

Processing chunk 187 with 100000 rows


Chunk 187: 100%|██████████| 11051/11051 [00:29<00:00, 377.42it/s]


Near-collision events found in chunk 187: 22779

Processing chunk 188 with 100000 rows


Chunk 188: 100%|██████████| 10118/10118 [00:38<00:00, 261.74it/s]


Near-collision events found in chunk 188: 32948

Processing chunk 189 with 100000 rows


Chunk 189: 100%|██████████| 7564/7564 [00:38<00:00, 198.33it/s] 


Near-collision events found in chunk 189: 48600

Processing chunk 190 with 100000 rows


Chunk 190: 100%|██████████| 10222/10222 [00:40<00:00, 254.75it/s]


Near-collision events found in chunk 190: 50229

Processing chunk 191 with 35630 rows


Chunk 191: 100%|██████████| 4166/4166 [00:05<00:00, 793.49it/s] 

Near-collision events found in chunk 191: 1134





In [7]:
master_csv = "near_collision_master.csv"

# Load the master CSV
df_master = pd.read_csv(master_csv)

# Check total number of near-collision events
print("Total near-collision events found:", len(df_master))
print(df_master.head())


Total near-collision events found: 677044
           time_window     mmsi_1     mmsi_2      lat_1     lon_1      lat_2  \
0  2015-09-30 22:18:00  228854000  228931000  48.118610 -4.415707  48.121580   
1  2015-09-30 22:19:00  228133000  228854000  48.117535 -4.424553  48.118410   
2  2015-09-30 22:20:00  228854000  228133000  48.118250 -4.424275  48.117233   
3  2015-09-30 22:21:00  228133000  228854000  48.116974 -4.433182  48.118060   
4  2015-09-30 22:24:00  228854000  228133000  48.117516 -4.441568  48.116100   

      lon_2  distance_m  speed_1  speed_2  course_1  course_2  
0 -4.419288  424.450297     10.5      8.9     274.6      78.5  
1 -4.420028  350.704813     10.6     10.4     263.1     269.3  
2 -4.429567  409.924222     10.4     10.7     261.4     264.9  
3 -4.428580  363.300903     10.7     10.4     264.9     268.4  
4 -4.446808  420.718312     10.8     10.7     261.5     264.3  
