In [None]:
import ast
import folium
import geopandas as gpd
import json
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import regex as re
import requests
import seaborn as sns
import time

from folium.plugins import HeatMap
from pyproj import Transformer
from shapely.geometry import Point, LineString, mapping
from shapely.ops import transform
from sklearn.cluster import DBSCAN
from tqdm import tqdm

In [None]:
#importing data
df = pd.read_csv(path)


In [None]:
#function that sort my data by timestamp and group it by mmsi to detect trajectories. It also calculate the intervals of time between two consecutive row from the same mmsi

def find_large_intervals(df):
    df['DATE TIME (UTC)'] = pd.to_datetime(df['DATE TIME (UTC)'])
    df = df.sort_values(by=['MMSI', 'DATE TIME (UTC)'])
    
    # Calculate the time difference between consecutive rows for each MMSI
    df['TimeDiff'] = df.groupby('MMSI')['DATE TIME (UTC)'].diff(-1).abs()
    df['TimeDiff'] = pd.to_timedelta(df['TimeDiff'])

    #the function return the data processed and filtered with intervals longer than 2 hours

    intervals = df[df['TimeDiff'] >= pd.Timedelta(hours=2)]
    
    
    
    return intervals

In [None]:
# find large intervals function call
intervals = find_large_intervals(df)

In [None]:
#removing nan values in the column timediff in the intervals dataframe
cleaned_intervals = intervals.dropna(subset=['TimeDiff'])

In [None]:
#removing noise in data in an area near the danish straits arising from vessel exiting the baltic sea where last AIS signal is recorded. I used a threeshold of ten days to make sure it is a vessel that left and reenter the baltic sea. 
threshold = pd.Timedelta(days=10)

# Defining area and apply threshold
condition = (cleaned_intervals['LONGITUDE'] <= 13) & \
            (cleaned_intervals['LATITUDE'] >= 54) & \
            (cleaned_intervals['TimeDiff'] >= threshold)

# cleaning the dataframe by removing the data identified by previous lines
large_intervals_cleaned = cleaned_intervals[~condition]

# SUBMARINE CABLE CHECK OF GOING DARK #

In [None]:
# This function open text file and return the content of the file. The files I am loading are txt files written in dictionary sintax containing name of cable and coordinates

def load_dict_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    content = re.sub(r'"([^"]+)"\s*:', r'"\1":', content)
    return ast.literal_eval("{" + content + "}")

# Load all datasets 
telco_cables_dict = load_dict_from_file('telco_cables.txt')
pow_cables_dict = load_dict_from_file('power_cables.txt')
gas_pipe_dict = load_dict_from_file('gas_pipe.txt')


In [None]:
#With folium library, this script project the cable geodata into a map and detect if a point in the large_intervals_cleaned is located within a buffer area of 10 km around the cable.

# these line us e a transformer to convert lon and lat values in metric system and viceversa
project_to_meters = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True).transform
project_to_degrees = Transformer.from_crs("EPSG:3857", "EPSG:4326", always_xy=True).transform
# create new columns in the dataframe. 0 if point is not added 1 is point is added
large_intervals_cleaned['dark_near_telco_cable'] = 0
large_intervals_cleaned['dark_near_power_cable'] = 0
large_intervals_cleaned['dark_near_gas_pipe'] = 0


# map created and centered around the first point
first_cable_points = list(telco_cables_dict.values())[0]
center_point = first_cable_points[0] 
m = folium.Map(location=[center_point[1], center_point[0]], zoom_start=6)
#Iterating on telco cables dictionary keys and Projecting telco cables
for cable_name, cable_points in telco_cables_dict.items():
    cable_line = LineString(cable_points)
    
    # Project in meters 
    cable_line_proj = transform(project_to_meters, cable_line)
    
    # buffer of 10km applied
    buffer_proj = cable_line_proj.buffer(10000)
    
    # reproject in lat and lon 
    buffer_latlon = transform(project_to_degrees, buffer_proj)
    
    # adding line of the cable
    folium.PolyLine(
        locations=[(pt[1], pt[0]) for pt in cable_points],
        color='red',
        weight=3,
        opacity=0.8,
        tooltip=cable_name
    ).add_to(m)
    
    # adding buffer
    folium.GeoJson(
        mapping(buffer_latlon),
        style_function=lambda x: {
            'fillColor': 'blue',
            'color': 'blue',
            'weight': 1,
            'fillOpacity': 0.3
        }
    ).add_to(m)
#iterating on power cables dictionary key and projecting power cables
for cable_name, cable_points in pow_cables_dict.items():
    cable_line = LineString(cable_points)
    
    # Project in meters 
    cable_line_proj = transform(project_to_meters, cable_line)
    
    # buffer of 10km applied
    buffer_proj = cable_line_proj.buffer(10000)
    
   # reproject in lat and lon 
    buffer_latlon = transform(project_to_degrees, buffer_proj)
    
    # Adding line of cable
    folium.PolyLine(
        locations=[(pt[1], pt[0]) for pt in cable_points],
        color='orange',
        weight=3,
        opacity=0.8,
        tooltip=cable_name
    ).add_to(m)
    
    # adding buffer area 
    folium.GeoJson(
        mapping(buffer_latlon),
        style_function=lambda x: {
            'fillColor': 'yellow',
            'color': 'yellow',
            'weight': 1,
            'fillOpacity': 0.3
        }
    ).add_to(m)

#Iterating on gas pipilines dictionary keys and projecting gas pipilines
for cable_name, cable_points in gas_pipe_dict.items():
    cable_line = LineString(cable_points)
    
    # Project in meters
    cable_line_proj = transform(project_to_meters, cable_line)
    
    # Apply buffer
    buffer_proj = cable_line_proj.buffer(10000)
    
    # Reproject into lat and lon
    buffer_latlon = transform(project_to_degrees, buffer_proj)
    
    # adding line of the cable
    folium.PolyLine(
        locations=[(pt[1], pt[0]) for pt in cable_points],
        color='black',
        weight=3,
        opacity=0.8,
        tooltip=cable_name
    ).add_to(m)
    
    # adding buffer area
    folium.GeoJson(
        mapping(buffer_latlon),
        style_function=lambda x: {
            'fillColor': 'green',
            'color': 'green',
            'weight': 1,
            'fillOpacity': 0.3
        }
    ).add_to(m)
# Check if points in large_intervals_cleaned are in the buffer area of any cable. If so, the point is added to the map
for _, row in tqdm(large_intervals_cleaned.dropna(subset=['LATITUDE', 'LONGITUDE']).iterrows()): 
    lon, lat = row['LONGITUDE'], row['LATITUDE']
    point = Point(lon, lat)
    point_added = False  
    
    # telco cable iteration check
    for cable_name, cable_points in telco_cables_dict.items():
        cable_line = LineString(cable_points)
        cable_buffer = cable_line.buffer(0.1)
        if cable_buffer.contains(point):
            # if point is in the cable add the point on the map
            folium.CircleMarker(
                location=(lat, lon),
                radius=1,
                color='purple',
                fill=True,
                fill_color='purple',
                fill_opacity=0.8
            ).add_to(m)
            point_added = True
            large_intervals_cleaned.at[_, 'dark_near_telco_cable'] = 1 #also, add 1 on the new column if the point is added
           
    
    # gas pipilines iteration check
    for cable_name2, cable_points2 in gas_pipe_dict.items():
        cable_line2 = LineString(cable_points2)
        cable_buffer2 = cable_line2.buffer(0.1)
        if cable_buffer2.contains(point):
            # # if point is in the cable add the point on the map
            folium.CircleMarker(
                location=(lat, lon),
                radius=1,
                color='blue',
                fill=True,
                fill_color='blue',
                fill_opacity=0.8
            ).add_to(m)
            point_added = True
            large_intervals_cleaned.at[_, 'dark_near_gas_pipe'] = 1 
          
    # powe cable check 
    for cable_name3, cable_points3 in pow_cables_dict.items():
        cable_line3 = LineString(cable_points3)
        cable_buffer3 = cable_line3.buffer(0.1)
        if cable_buffer3.contains(point):
            folium.CircleMarker(
                location=(lat, lon),
                radius=1,
                color='orange',
                fill=True,
                fill_color='orange',
                fill_opacity=0.8
            ).add_to(m)
            point_added = True
            large_intervals_cleaned.at[_, 'dark_near_power_cable'] = 1 
         

    
    if not point_added:

    
        large_intervals_cleaned.at[_, 'dark_near_telco_cable'] = 0 
        large_intervals_cleaned.at[_, 'dark_near_gas_pipe'] = 0
        large_intervals_cleaned.at[_, 'dark_near_power_cable'] = 0

# Save map
m.save("all_cables_large_intervals_map.html")
