In [None]:
!pip install pymongo[srv] pandas prettytable nest_asyncio websockets

import asyncio
import nest_asyncio
from pymongo import MongoClient
from prettytable import PrettyTable
import pandas as pd
import random
from datetime import datetime, timezone, timedelta

# Allow nested asyncio loops (required for Google Colab)
nest_asyncio.apply()

# MongoDB Atlas setup
uri = "mongodb+srv://22CS8071:123%40Acharjee@acharjee.hkfbc.mongodb.net/?retryWrites=true&w=majority&appName=acharjee"

# Initialize MongoDB client
try:
    client = MongoClient(uri)
    client.admin.command('ping')
    print("Successfully connected to MongoDB!")
except Exception as e:
    print(f"Failed to connect to MongoDB: {e}")

# Set up MongoDB time series database and collection
db = client['ais_database_71_11']
ships_collection = db["ships"]

def fetch_current_data(ship_id=None):
    """
    Fetch current data of all ships or a specific ship from MongoDB and display it in a table.
    """
    query = {} if ship_id is None else {"ShipId": ship_id}
    ships = ships_collection.find(query)

    # Create a table for current data
    current_table = PrettyTable()
    current_table.field_names = ["ShipId", "Latitude", "Longitude", "SOG", "COG", "Heading", "RateOfTurn", "NavigationalStatus", "PositionAccuracy", "RAIM", "CommunicationState", "Timestamp"]

    for ship in ships:
        current_table.add_row([
            ship["ShipId"], ship["Latitude"], ship["Longitude"], ship["SOG"],
            ship["COG"], ship["Heading"], ship.get("RateOfTurn", "N/A"), ship.get("NavigationalStatus", "N/A"),
            ship.get("PositionAccuracy", "N/A"), ship.get("RAIM", "N/A"), ship.get("CommunicationState", "N/A"),
            ship["Timestamp"]
        ])

    if ship_id:
        print(f"Current Data for ShipId {ship_id}:")
    else:
        print("Current Data of All Ships:")
    print(current_table)

def fetch_history_data(ship_id):
    """
    Fetch historical data of a particular ship and display it in a table.
    """
    ship = ships_collection.find_one({"ShipId": ship_id})

    if not ship:
        print(f"No data found for ShipId {ship_id}.")
        return

    history = ship.get("History", [])

    if len(history) < 1:
        print(f"No historical data available for ShipId {ship_id}.")
        return

    # Create a table for historical data
    history_table = PrettyTable()
    history_table.field_names = ["Latitude", "Longitude", "SOG", "COG", "Heading", "RateOfTurn", "NavigationalStatus", "PositionAccuracy", "RAIM", "CommunicationState", "Timestamp"]

    for record in history:
        history_table.add_row([
            record["Latitude"], record["Longitude"], record["SOG"],
            record["COG"], record["Heading"], record.get("RateOfTurn", "N/A"),
            record.get("NavigationalStatus", "N/A"), record.get("PositionAccuracy", "N/A"),
            record.get("RAIM", "N/A"), record.get("CommunicationState", "N/A"),
            record["Timestamp"]
        ])

    print(f"Historical Data for ShipId {ship_id}:")
    print(history_table)





def fetch_changes_in_parameters(ship_id):
    """
    Calculate and display changes in parameters over time for a particular ship.
    """
    ship = ships_collection.find_one({"ShipId": ship_id})

    if not ship:
        print(f"No data found for ShipId {ship_id}.")
        return

    history = ship.get("History", [])

    if len(history) < 2:
        print(f"Not enough historical data to calculate changes for ShipId {ship_id}.")
        return

    # Convert timestamps to datetime objects
    for record in history:
        timestamp = record.get('Timestamp')
        if isinstance(timestamp, str):
            try:
                record['Timestamp'] = datetime.fromisoformat(timestamp)
            except ValueError:
                try:
                    record['Timestamp'] = datetime.utcfromtimestamp(float(timestamp))
                except ValueError:
                    print(f"Cannot parse timestamp: {timestamp}")
                    continue
        elif isinstance(timestamp, (int, float)):
            record['Timestamp'] = datetime.utcfromtimestamp(timestamp)
        elif isinstance(timestamp, datetime):
            continue
        else:
            print(f"Unexpected timestamp format: {timestamp}")
            continue

    # Sort history by timestamp
    history.sort(key=lambda x: x['Timestamp'])

    # Create a table for parameter changes
    change_table = PrettyTable()
    change_table.field_names = ["Parameter", "Change from Previous to Current Value"]

    # Calculate changes between the most recent data and the latest historical data
    current_record = history[-1]
    previous_record = history[-2]
    parameters = ["Latitude", "Longitude", "SOG", "COG", "Heading", "RateOfTurn", "NavigationalStatus", "PositionAccuracy", "RAIM", "CommunicationState"]

    for param in parameters:
        prev_value = previous_record.get(param, "N/A")
        curr_value = current_record.get(param, "N/A")
        if prev_value != "N/A" and curr_value != "N/A":
            try:
                change = curr_value - prev_value
                change_table.add_row([param, f"{change:.2f}"])
            except TypeError:
                change_table.add_row([param, "Error"])
        else:
            change_table.add_row([param, "N/A"])

    print(f"Changes in Parameters from Previous to Current Data for ShipId {ship_id}:")
    print(change_table)

    # Create a table for rate of change
    rate_table = PrettyTable()
    rate_table.field_names = ["Parameter", "Time Difference (s)", "Rate of Change (per second)"]

    # Calculate changes between subsequent historical records
    for i in range(len(history) - 1, 0, -1):
        prev_record = history[i - 1]
        curr_record = history[i]
        time_diff = (curr_record['Timestamp'] - prev_record['Timestamp']).total_seconds()

        if time_diff >= 120:  # Ensure time difference is at least 2 minutes
            for param in parameters:
                prev_value = prev_record.get(param, "N/A")
                curr_value = curr_record.get(param, "N/A")
                if prev_value != "N/A" and curr_value != "N/A":
                    try:
                        change = curr_value - prev_value
                        rate_of_change = change / time_diff
                        rate_table.add_row([param, f"{time_diff:.2f}", f"{rate_of_change:.2f}"])
                    except TypeError:
                        rate_table.add_row([param, f"{time_diff:.2f}", "Error"])

    print(f"Rate of Change for ShipId {ship_id}:")
    print(rate_table)


def fetch_interval_data(ship_id, current_data):
    """
    Fetch and display data for a specific ship based on time intervals:
    - Data >=5 but <10 minutes old
    - Data >=10 but <15 minutes old
    - Data >=15 minutes old
    """
    ship = ships_collection.find_one({"ShipId": ship_id})

    if not ship:
        print(f"No data found for ShipId {ship_id}.")
        return

    history = ship.get("History", [])

    if len(history) < 1:
        print(f"No historical data available for ShipId {ship_id}.")
        return

    # Define the time intervals
    current_time = current_data["Timestamp"]

    # Ensure all timestamps are offset-aware
    current_time = current_time if current_time.tzinfo else current_time.replace(tzinfo=timezone.utc)

    intervals = {
        "5-10 Minutes Ago": (current_time - timedelta(minutes=10), current_time - timedelta(minutes=5)),
        "10-15 Minutes Ago": (current_time - timedelta(minutes=15), current_time - timedelta(minutes=10)),
        "15+ Minutes Ago": (None, current_time - timedelta(minutes=15))
    }

    # Create a table for interval data
    interval_table = PrettyTable()
    interval_table.field_names = ["Interval", "Latitude", "Longitude", "SOG", "COG", "Heading", "RateOfTurn", "NavigationalStatus", "PositionAccuracy", "RAIM", "CommunicationState", "Timestamp"]

    # Sort history by timestamp in descending order
    history.sort(key=lambda x: x['Timestamp'], reverse=True)

    # Convert all timestamps in the history to offset-aware datetimes
    for record in history:
        timestamp = record["Timestamp"]
        if timestamp.tzinfo is None:
            record["Timestamp"] = timestamp.replace(tzinfo=timezone.utc)
        else:
            record["Timestamp"] = timestamp

    # Fetch data for each interval
    for interval_name, (older_than, newer_than) in intervals.items():
        interval_data = None
        for record in history:
            record_time = record["Timestamp"]
            if older_than is None:  # No upper limit, only a lower limit
                if record_time <= newer_than:
                    interval_data = record
                    break
            elif newer_than <= record_time < older_than:
                interval_data = record
                break

        if interval_data:
            interval_table.add_row([
                interval_name,
                interval_data["Latitude"], interval_data["Longitude"], interval_data["SOG"],
                interval_data["COG"], interval_data["Heading"], interval_data.get("RateOfTurn", "N/A"),
                interval_data.get("NavigationalStatus", "N/A"), interval_data.get("PositionAccuracy", "N/A"),
                interval_data.get("RAIM", "N/A"), interval_data.get("CommunicationState", "N/A"),
                interval_data["Timestamp"]
            ])
        else:
            interval_table.add_row([interval_name] + ["N/A"] * 11)

    print(f"Data for ShipId {ship_id} at Various Time Intervals:")
    print(interval_table)


def fetch_random_ship_data():
    """
    Fetch data for a random ship with historical data from the database and display current data,
    historical data, changes in parameters, and interval data in separate tables.
    """
    # Get a list of all ship IDs with historical data
    ships_with_history = ships_collection.find({"History": {"$exists": True, "$not": {"$size": 0}}})
    ship_ids_with_history = [ship["ShipId"] for ship in ships_with_history]

    if not ship_ids_with_history:
        print("No ships with historical data found in the database.")
        return

    # Select a random ship ID
    selected_ship_id = random.choice(ship_ids_with_history)
    print(f"Selected ShipId: {selected_ship_id}")

    # Fetch current data for the selected ship
    current_data_query = ships_collection.find_one({"ShipId": selected_ship_id})
    if not current_data_query:
        print(f"No current data found for ShipId {selected_ship_id}.")
        return

    # Convert the current timestamp to a datetime object
    timestamp = current_data_query.get("Timestamp")
    if isinstance(timestamp, str):
        try:
            current_timestamp = datetime.fromisoformat(timestamp)
        except ValueError:
            print(f"Cannot parse ISO format timestamp: {timestamp}")
            return
    elif isinstance(timestamp, (int, float)):
        current_timestamp = datetime.utcfromtimestamp(timestamp)
    elif isinstance(timestamp, datetime):
        current_timestamp = timestamp
    else:
        print(f"Unexpected timestamp format: {timestamp}")
        return

    # Ensure the timestamp is offset-aware
    if current_timestamp.tzinfo is None:
        current_timestamp = current_timestamp.replace(tzinfo=timezone.utc)

    current_data = {"Timestamp": current_timestamp}

    # Fetch and print current data for the selected ship
    fetch_current_data(selected_ship_id)

    # Fetch and print historical data for the selected ship
    print(f"\nFetching historical data for ShipId {selected_ship_id}:")
    fetch_history_data(selected_ship_id)

    # Fetch and print changes in parameters for the selected ship
    print(f"\nFetching changes in parameters for ShipId {selected_ship_id}:")
    fetch_changes_in_parameters(selected_ship_id)

    # Fetch and print interval data for the selected ship
    print(f"\nFetching interval data for ShipId {selected_ship_id}:")
    fetch_interval_data(selected_ship_id, current_data)

fetch_random_ship_data()


Collecting websockets
  Downloading websockets-13.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting pymongo[srv]
  Downloading pymongo-4.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
[0mCollecting dnspython<3.0.0,>=1.16.0 (from pymongo[srv])
  Downloading dnspython-2.6.1-py3-none-any.whl.metadata (5.8 kB)
Downloading websockets-13.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (157 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m157.3/157.3 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dnspython-2.6.1-py3-none-any.whl (307 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.7/307.7 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pymongo-4.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

THE CODE BELOW DOES THE FOLLOWING OPERATIONS :- 
1. IT TAKES INPUT OF A SHIP BY ITS UNIQUE SHIP ID ( CURRENT AND HISTORICAL)
2. CORRECTS IT USING:-

     a. KAMLAN FILTER 

     b. LINEAR INTERPOLATION - TO MAKE THE JOURNEY PATH SMOOTHER 
                                IF DIFFERENCE BETWEEN TWO DATA EXCEEDS 15 
                                MINS THEN INTERPOLATE BETWEEN THE TWO POINTS

     c.GEOPY FOR LATITUDE AND LONGITUDE CORRECTION 
     
     d.CUTS DATA OF THOSE SHIPS IF THERE NO. OF HISTORY RECORDS LESS THAN 25

In [None]:
!pip install filterpy

Collecting filterpy
  Downloading filterpy-1.4.5.zip (177 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/178.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m174.1/178.0 kB[0m [31m6.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.0/178.0 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: filterpy
  Building wheel for filterpy (setup.py) ... [?25l[?25hdone
  Created wheel for filterpy: filename=filterpy-1.4.5-py3-none-any.whl size=110459 sha256=73cee04bb52cd195b1d9e71a6acbe43819d7844514a6473994a0574b03ff6164
  Stored in directory: /root/.cache/pip/wheels/0f/0c/ea/218f266af4ad626897562199fbbcba521b8497303200186102
Successfully built filterpy
Installing collected packages: filterpy
Successfully installed filterpy-1.4.5


In [None]:
from pymongo import MongoClient
import random
from datetime import datetime, timezone, timedelta
from geopy.distance import geodesic
import numpy as np
from scipy.interpolate import lagrange
from filterpy.kalman import KalmanFilter
import nest_asyncio

# Allow nested asyncio loops (required for Google Colab)
nest_asyncio.apply()

# MongoDB Atlas setup
uri = "mongodb+srv://22CS8071:123%40Acharjee@acharjee.hkfbc.mongodb.net/?retryWrites=true&w=majority&appName=acharjee"

# Initialize MongoDB client
try:
    client = MongoClient(uri)
    client.admin.command('ping')
    print("Successfully connected to MongoDB!")
except Exception as e:
    print(f"Failed to connect to MongoDB: {e}")

# Set up MongoDB databases and collections
db = client['ais_database_71_11']
ships_collection = db["ships"]

# Define new database and collection for processed data
processed_db = client['ais_database_71_11']
processed_ships_collection = processed_db["ships_1"]

# Function Definitions (map_match, correct_coordinate, calculate_initial_compass_bearing, delete_invalid_data, cut_data, kalman_filter_smoothing, interpolate_trajectory)

def map_match(ship_data, max_distance_threshold=0.5):
    corrected_data = []
    for i, record in enumerate(ship_data):
        latitude, longitude = record['Latitude'], record['Longitude']
        if i == 0:
            corrected_data.append(record)
            continue
        prev_record = corrected_data[-1]
        prev_latitude, prev_longitude = prev_record['Latitude'], prev_record['Longitude']
        distance = geodesic((prev_latitude, prev_longitude), (latitude, longitude)).kilometers
        if distance > max_distance_threshold:
            corrected_point = correct_coordinate(prev_latitude, prev_longitude, latitude, longitude)
            if corrected_point:
                record['Latitude'], record['Longitude'] = corrected_point
                print(f"Corrected point at index {i}: {corrected_point}")
            else:
                print(f"Skipping correction for point at index {i} due to significant deviation.")
        corrected_data.append(record)
    return corrected_data

from geopy.distance import geodesic

def correct_coordinate(prev_lat, prev_lon, cur_lat, cur_lon, correction_factor=0.1):
    """
    Correct the given coordinate using a correction factor and the direction from the previous point.

    :param prev_lat: Latitude of the previous point.
    :param prev_lon: Longitude of the previous point.
    :param cur_lat: Current latitude that needs correction.
    :param cur_lon: Current longitude that needs correction.
    :param correction_factor: Factor by which the deviation is adjusted.
    :return: Corrected latitude and longitude.
    """
    prev_point = (prev_lat, prev_lon)
    cur_point = (cur_lat, cur_lon)
    distance = geodesic(prev_point, cur_point).kilometers

    if distance > 0:
        # Calculate the bearing (direction) from the previous point to the current point
        bearing = calculate_initial_compass_bearing(prev_point, cur_point)

        # Move back along the bearing by a fraction of the distance
        corrected_distance = distance * correction_factor
        new_point = geodesic(kilometers=corrected_distance).destination(prev_point, bearing)

        return new_point.latitude, new_point.longitude

    return None


def calculate_initial_compass_bearing(pointA, pointB):
    if (type(pointA) != tuple) or (type(pointB) != tuple):
        raise TypeError("Only tuples are supported as arguments")
    lat1 = np.radians(pointA[0])
    lat2 = np.radians(pointB[0])
    diffLong = np.radians(pointB[1] - pointA[1])
    x = np.sin(diffLong) * np.cos(lat2)
    y = np.cos(lat1) * np.sin(lat2) - (np.sin(lat1) * np.cos(lat2) * np.cos(diffLong))
    initial_bearing = np.arctan2(x, y)
    initial_bearing = np.degrees(initial_bearing)
    compass_bearing = (initial_bearing + 360) % 360
    return compass_bearing

def calculate_turn_angle(pointA, pointB, pointC):
    """
    Calculate the turn angle formed by three points A, B, and C.

    :param pointA: Tuple of (Latitude, Longitude) for the first point.
    :param pointB: Tuple of (Latitude, Longitude) for the second point.
    :param pointC: Tuple of (Latitude, Longitude) for the third point.
    :return: The turn angle in degrees between the two segments AB and BC.
    """
    # Calculate bearings between the points
    bearing_AB = calculate_initial_compass_bearing(pointA, pointB)
    bearing_BC = calculate_initial_compass_bearing(pointB, pointC)

    # Calculate the turn angle
    turn_angle = abs(bearing_BC - bearing_AB)

    # Ensure the angle is between 0 and 180 degrees
    if turn_angle > 180:
        turn_angle = 360 - turn_angle

    return turn_angle


def delete_invalid_data(ship_data, max_speed_threshold=5, max_turn_angle=75):
    valid_data = []
    for record in ship_data:
        mmsi = record.get('ShipId')
        if mmsi is None or len(str(mmsi)) != 9:
            continue
        if 'History' in record and record['History']:
            unique_history = {frozenset(item.items()): item for item in record['History']}.values()
            history = list(unique_history)
            history.sort(key=lambda x: x['Timestamp'])
            cleaned_history = [history[0]]
            for i in range(1, len(history) - 1):
                prev_point = cleaned_history[-1]
                current_point = history[i]
                next_point = history[i + 1]
                prev_coords = (prev_point['Latitude'], prev_point['Longitude'])
                current_coords = (current_point['Latitude'], current_point['Longitude'])
                next_coords = (next_point['Latitude'], next_point['Longitude'])
                distance1 = geodesic(prev_coords, current_coords).nautical
                distance2 = geodesic(current_coords, next_coords).nautical
                time_diff1 = (current_point['Timestamp'] - prev_point['Timestamp']).total_seconds() / 3600
                time_diff2 = (next_point['Timestamp'] - current_point['Timestamp']).total_seconds() / 3600
                speed1 = distance1 / time_diff1 if time_diff1 > 0 else 0
                speed2 = distance2 / time_diff2 if time_diff2 > 0 else 0
                angle = calculate_turn_angle(prev_coords, current_coords, next_coords)
                if speed1 <= max_speed_threshold and speed2 <= max_speed_threshold and angle <= max_turn_angle:
                    cleaned_history.append(current_point)
            cleaned_history.append(history[-1])
            record['History'] = cleaned_history
        valid_data.append(record)
    return valid_data

def cut_data(ship_data, time_gap_threshold=15, distance_threshold=2):
    cut_data = []
    for record in ship_data:
        if 'History' not in record or not record['History']:
            cut_data.append(record)
            continue
        history = record['History']
        voyages = []
        current_voyage = []
        for i in range(1, len(history)):
            prev_point = history[i - 1]
            current_point = history[i]
            time_diff = current_point['Timestamp'] - prev_point['Timestamp']
            time_diff_minutes = time_diff.total_seconds() / 60
            prev_coords = (prev_point['Latitude'], prev_point['Longitude'])
            current_coords = (current_point['Latitude'], current_point['Longitude'])
            distance = geodesic(prev_coords, current_coords).kilometers
            if time_diff_minutes > time_gap_threshold or distance > distance_threshold:
                if current_voyage:
                    voyages.append(current_voyage)
                current_voyage = [current_point]
            else:
                current_voyage.append(current_point)
        if current_voyage:
            voyages.append(current_voyage)
        record['Voyages'] = voyages
        cut_data.append(record)
    return cut_data

def kalman_filter_smoothing(history):
    kf = KalmanFilter(dim_x=4, dim_z=2)
    dt = 1
    kf.F = np.array([[1, dt, 0, 0], [0, 1, 0, 0], [0, 0, 1, dt], [0, 0, 0, 1]])
    kf.H = np.array([[1, 0, 0, 0], [0, 0, 1, 0]])
    kf.P *= 1000.
    kf.Q = np.array([[1, 0, 0, 0], [0, 0.1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0.1]])
    kf.R = np.array([[5, 0], [0, 5]])
    smoothed_data = []
    first_record = history[0]
    kf.x = np.array([first_record['Latitude'], 0, first_record['Longitude'], 0])
    for record in history:
        measurement = np.array([record['Latitude'], record['Longitude']])
        kf.predict()
        kf.update(measurement)
        record['Latitude'], record['Longitude'] = kf.x[0], kf.x[2]
        smoothed_data.append(record)
    return smoothed_data

def interpolate_trajectory(history, max_time_gap=15):
    if len(history) < 3:
        return history
    history.sort(key=lambda x: x['Timestamp'])
    interpolated_history = []
    for i in range(len(history) - 1):
        interpolated_history.append(history[i])
        current_point = history[i]
        next_point = history[i + 1]
        time_diff = (next_point['Timestamp'] - current_point['Timestamp']).total_seconds() / 60
        if 0 < time_diff <= max_time_gap:
            num_points_to_interpolate = int(time_diff - 1)
            timestamps = [current_point['Timestamp'].timestamp(), next_point['Timestamp'].timestamp()]
            latitudes = [current_point['Latitude'], next_point['Latitude']]
            longitudes = [current_point['Longitude'], next_point['Longitude']]
            t = np.linspace(timestamps[0], timestamps[1], num=num_points_to_interpolate + 2)
            interpolated_latitudes = lagrange(timestamps, latitudes)(t)
            interpolated_longitudes = lagrange(timestamps, longitudes)(t)
            for j in range(1, num_points_to_interpolate + 1):
                interpolated_history.append({
                    'Timestamp': datetime.fromtimestamp(t[j], tz=timezone.utc),
                    'Latitude': interpolated_latitudes[j],
                    'Longitude': interpolated_longitudes[j]
                })
    interpolated_history.append(history[-1])
    return interpolated_history

def preprocess_ais_data():
    ships = list(ships_collection.find())
    processed_ships = []

    for ship in ships:
        if 'History' in ship and ship['History']:
            history = ship['History']
            history = map_match(history)
            history = delete_invalid_data([{'ShipId': ship['ShipId'], 'History': history}])[0]['History']
            history = cut_data([{'ShipId': ship['ShipId'], 'History': history}])[0]['Voyages']
            for voyage in history:
                voyage = kalman_filter_smoothing(voyage)
                voyage = interpolate_trajectory(voyage)
                processed_ships.append({
                    'ShipId': ship['ShipId'],
                    'Voyages': voyage
                })

    if processed_ships:
        processed_ships_collection.insert_many(processed_ships)
        print(f"Inserted {len(processed_ships)} processed records into the new database.")
    else:
        print("No data to insert into the new database.")

# Run the preprocessing function
preprocess_ais_data()


Successfully connected to MongoDB!
Corrected point at index 4: (29.73866753675624, -95.15515889577007)
Corrected point at index 5: (29.738391238010976, -95.15427815994566)
Corrected point at index 6: (29.73813815308941, -95.15340292791373)
Corrected point at index 7: (29.73789406248118, -95.15242782136852)
Corrected point at index 8: (29.73764050347757, -95.1513912302162)
Corrected point at index 9: (29.737358006107872, -95.15022866677842)
Corrected point at index 8: (29.72040057246403, -93.98614404707692)
Corrected point at index 9: (29.706082885410364, -94.0801702199496)
Corrected point at index 6: (29.821649221721074, -95.07749087415749)
Corrected point at index 7: (29.818555747192683, -95.07602223267443)
Corrected point at index 8: (29.81573190374566, -95.07471936245334)
Corrected point at index 9: (29.812987928272612, -95.07369952084962)
Corrected point at index 4: (28.391668502277305, -87.9959124402059)
Corrected point at index 5: (28.391166034025083, -87.99617225461613)
Correcte

In [None]:
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure
import json

# MongoDB Atlas setup
uri = "mongodb+srv://22CS8071:123%40Acharjee@acharjee.hkfbc.mongodb.net/?retryWrites=true&w=majority&appName=acharjee"

# Initialize MongoDB client
try:
    client = MongoClient(uri)
    client.admin.command('ping')
    print("Successfully connected to MongoDB!")
except ConnectionFailure as e:
    print(f"Failed to connect to MongoDB: {e}")

# Set up MongoDB databases and collections
db = client['ais_database_71_11']
ships_collection = db["ships"]
processed_db = client['ais_database_71_11']
processed_ships_collection = processed_db["ships_1"]

def process_change(change):
    if 'fullDocument' in change:
        document = change['fullDocument']
        if 'History' in document and document['History']:
            history = document['History']
            history = map_match(history)
            history = delete_invalid_data([{'ShipId': document['ShipId'], 'History': history}])[0]['History']
            history = cut_data([{'ShipId': document['ShipId'], 'History': history}])[0]['Voyages']
            for voyage in history:
                voyage = kalman_filter_smoothing(voyage)
                voyage = interpolate_trajectory(voyage)
                processed_ships_collection.update_one(
                    {'ShipId': document['ShipId']},
                    {'$set': {'Voyages': voyage}},
                    upsert=True
                )
            print(f"Processed and updated ShipId: {document['ShipId']}")
    else:
        print("Change event does not contain 'fullDocument'")

# Watch the ships collection for changes with fullDocument option
pipeline = [
    {'$match': {'operationType': {'$in': ['insert', 'update']}}},
    {'$addFields': {'fullDocument': '$fullDocument'}}
]

with ships_collection.watch(pipeline, full_document='updateLookup') as stream:
    print("Watching for changes...")
    for change in stream:
        process_change(change)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Corrected point at index 5: (28.937178398724676, -95.32321908972641)
Corrected point at index 6: (28.937099846106364, -95.32395929801258)
Corrected point at index 7: (28.937107082028533, -95.3246682006169)
Corrected point at index 8: (28.937237361124062, -95.32534538127365)
Corrected point at index 9: (28.93748521591568, -95.32600263924108)
Processed and updated ShipId: 367311490
Processed and updated ShipId: 367505070
Processed and updated ShipId: 367340430
Processed and updated ShipId: 367137180
Processed and updated ShipId: 367188610
Processed and updated ShipId: 367478440
Processed and updated ShipId: 368283350
Corrected point at index 5: (29.72732744657074, -95.21050430803577)
Corrected point at index 6: (29.72815654587409, -95.20997446657361)
Corrected point at index 7: (29.729115568708405, -95.20930222492908)
Corrected point at index 8: (29.730163926929798, -95.20847142901545)
Corrected point at index 9: (29.731262