In [None]:
import pandas as pd
import geopandas as gpd
import geodatasets
import matplotlib.pyplot as plt
import numpy as np
import movingpandas as mpd
import shapely as shp
import hvplot.pandas 
import matplotlib.pyplot as plt
import math
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
from urllib.request import urlretrieve
from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
from holoviews import opts, dim
from os.path import exists
import contextily as ctx
import geoplot
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import HDBSCAN
from sklearn.cluster import DBSCAN
from tqdm import tqdm
import folium
import geoviews as gv
import geoviews.feature as gf
import holoviews as hv
from geoviews import opts
from cartopy import crs
gv.extension('bokeh', 'matplotlib')
from scipy.spatial import ConvexHull, convex_hull_plot_2d
plot_defaults = {'linewidth':5, 'capstyle':'round', 'figsize':(20,10), 'legend':True}
opts.defaults(opts.Overlay(active_tools=['wheel_zoom'], frame_width=500, frame_height=400))
hvplot_defaults = {'tiles':None, 'cmap':'Viridis', 'colorbar':True}
%matplotlib inline

In [None]:
import zipfile
import pandas as pd
import os
import shutil


def estrai_dataframe_da_zip(zip_path, lat_min, lat_max, lon_min, lon_max):
    """
    Estrae un DataFrame da una cartella zip di AIS data con un filtro basato su latitudine e longitudine.

    Parameters:
    - zip_path (str): Il percorso del file zip contenente i dati AIS.
    - lat_min, lat_max, lon_min, lon_max (float): I valori minimi e massimi per la latitudine e la longitudine.

    Returns:
    - pandas.DataFrame: Il DataFrame filtrato.
    """

    # Estrai il contenuto dell'archivio zip in una cartella temporanea
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        temp_dir = 'temp_extracted_folder'
        zip_ref.extractall(temp_dir)

    # Leggi i file CSV nella cartella temporanea e filtra i dati in base ai valori di latitudine e longitudine
    dataframes = []
    for root, dirs, files in os.walk(temp_dir):
        for file in files:
            if file.endswith('.csv'):
                file_path = os.path.join(root, file)
                df = pd.read_csv(file_path)

                # Applica il filtro sulla regione geografica
                df_filtered = df[(df['LAT'] >= lat_min) & (df['LAT'] <= lat_max) &
                                  (df['LON'] >= lon_min) & (df['LON'] <= lon_max)]

                dataframes.append(df_filtered)

    # Concatena tutti i DataFrame filtrati in uno
    final_df = pd.concat(dataframes, ignore_index=True)

    # Rimuovi la cartella temporanea
    shutil.rmtree(temp_dir)

    return final_df


In [21]:
import zipfile
import pandas as pd
import os
import shutil

def estrai_dataframe_da_cartella(cartella, lat_min, lat_max, lon_min, lon_max):
    """
    Estrae un DataFrame da una cartella contenente file zip di AIS data con un filtro basato su latitudine e longitudine.

    Parameters:
    - cartella (str): Il percorso della cartella contenente i file zip AIS.
    - lat_min, lat_max, lon_min, lon_max (float): I valori minimi e massimi per la latitudine e la longitudine.

    Returns:
    - pandas.DataFrame: Il DataFrame concatenato filtrato da tutti i file zip.
    """

    dataframes = []

    # Scansiona tutti i file nella cartella
    for root, dirs, files in os.walk(cartella):
        for file in files:
            if file.endswith('.zip'):
                zip_path = os.path.join(root, file)
                temp_df = estrai_dataframe_da_zip(zip_path, lat_min, lat_max, lon_min, lon_max)
                dataframes.append(temp_df)

    # Concatena tutti i DataFrame filtrati in uno
    final_df = pd.concat(dataframes, ignore_index=True)

    return final_df

In [22]:
# Esempio di utilizzo
cartella_dati = "../NOAA_Office for Coastal Management_AIS/Dataset/AIS_DATA_gennaio_2021"
min_lat, max_lat, min_lon, max_lon = 18, 22.5, -160, -150  # Imposta i valori della tua regione


df_filtered_combined = estrai_dataframe_da_cartella(cartella_dati, min_lat, max_lat, min_lon, max_lon)

In [27]:
print('numero dati: '+ str(len(df_filtered_combined)) +  '  periodo temporale  ' + str(df_filtered_combined.iloc[0]['BaseDateTime'])+' //  '+ str(df_filtered_combined.iloc[len(df_filtered_combined)-1]['BaseDateTime']))
df_filtered_combined.head()

numero dati: 1887647  periodo temporale  2021-01-01T00:17:29 //  2021-01-31T23:59:12


Unnamed: 0,MMSI,BaseDateTime,LAT,LON,SOG,COG,Heading,VesselName,IMO,CallSign,VesselType,Status,Length,Width,Draft,Cargo,TranscieverClass
0,0,2021-01-01T00:17:29,21.31638,-157.87736,0.0,218.4,511.0,RAINBOWS,IMO0000000,WDK6055,37.0,,0.0,0.0,2.7,30.0,B
1,367055690,2021-01-01T00:17:12,21.28671,-157.84403,0.0,262.0,511.0,AMERICAN EMERALD,IMO8842296,WDC6392,31.0,0.0,19.0,7.0,,39.0,A
2,367128430,2021-01-01T00:17:26,21.26552,-157.88073,8.0,176.5,511.0,SEA MOON I,IMO0000000,WDI3527,30.0,,22.0,6.0,,,B
3,366999665,2021-01-01T00:17:13,21.30943,-157.87424,0.0,119.5,511.0,CG KITTIWAKE,,NTNL,90.0,0.0,26.0,5.0,,,A
4,367389160,2021-01-01T00:17:14,21.31032,-157.8693,0.1,346.2,337.0,PI ILANI,IMO9369186,WDE7117,31.0,0.0,23.0,10.0,,,A


In [25]:
#mappa geografica con folium hawaii 21, -158

map = folium.Map(location=[21, -158], tiles="OpenStreetMap", zoom_start=9)
for i in range(0,len(df_filtered_combined),10000):
   folium.Marker(
      location=[df_filtered_combined.iloc[i]['LAT'], df_filtered_combined.iloc[i]['LON']]
   ).add_to(map)
map 