In [None]:
import os
import sys
import datetime


In [None]:
datetimeFormat = "%Y-%m-%dT%H:%M:%S"
timeBucketSeconds = 60*30 #60 seconds times how many number of minutes
deResoluteFactor = 10

In [None]:
sourceFile= "../../ais/AIS_2022_08_15.csv"

In [None]:
def readinSourceFile(sf):
    rv = {}
    line=None
    header=None
    c=0
    globalMinDT = None
    globalMaxDT = None
    with open(sf) as fIn:
        line=fIn.readline()
        while(line):
            lineBits = line.strip().split(",")
            lineBitsLen = len(lineBits)
            if(lineBitsLen!=17):
                print(lineBitsLen)
            if(c==0):
                header = lineBits
            else:
                mmsi = str(lineBits[0])
                if not mmsi in rv:
                    rv[mmsi] = []
                item = {
                    "BaseDateTime" : lineBits[1],
                    "LAT" : float(lineBits[2]),
                    "LON" : float(lineBits[3])
#                    "SOG" : lineBits[4],
#                    "COG" : lineBits[5],
#                    "Heading" : lineBits[6],
#                    "VesselName" : lineBits[7]
                }
                rv[mmsi].extend([item])
                if(globalMinDT is None or lineBits[1]<globalMinDT):
                    globalMinDT = lineBits[1]
                if(globalMaxDT is None or lineBits[1]>globalMaxDT):
                    globalMaxDT = lineBits[1]                    
            c=c+1
            line = fIn.readline()
    return (header, rv, globalMinDT, globalMaxDT, c)

In [None]:
def filterAISData(aisData, minCount):
    #remove vessels with less that minCount entries.  many vessels only have 1-2 reports
    rv = {}
    for mmsi in aisData:
        mmsiItems = aisData[mmsi]
        mmsiItems=sorted(mmsiItems, key=lambda k: k['BaseDateTime'])
        items = []
        ldt = None
        for x in mmsiItems:
            dt = x["BaseDateTime"]
            if(dt != ldt):
                items.extend([x])
                ldt = dt
        if(len(items)>=minCount):
            rv[mmsi] = items
    return rv

In [None]:
#depreicated
def findClosestNeighborsToDateTime(dtTarget, aisShipData):
    itemNearMin=None
    itemNearMax=None
    itemNearMinDTO = None
    itemNearMaxDTO = None
    
    for x in aisShipData:
#        v = x["VesselName"]
        dt = x["BaseDateTime"]
        lat = x["LAT"]
        lon = x["LON"]
        dto = datetime.datetime.strptime(dt, datetimeFormat)
        if(dto<=dtTarget) and (itemNearMin is None or dto>=itemNearMinDTO):
            itemNearMin = x
            itemNearMinDTO = dto
        if(dto>=dtTarget) and (itemNearMax is None or dto<=itemNearMaxDTO):
            itemNearMax = x
            itemNearMaxDTO = dto
    return (itemNearMin, itemNearMax)       

In [None]:
def interpolatePointFromNeighbors(dtTarget, prevPoint, nextPoint):
    dt_prev = prevPoint["BaseDateTime"]
    lat_prev = float(prevPoint["LAT"])
    lon_prev = float(prevPoint["LON"])
    dt_next = nextPoint["BaseDateTime"]
    lat_next = float(nextPoint["LAT"])
    lon_next = float(nextPoint["LON"])
    dto_prev = datetime.datetime.strptime(dt_prev, datetimeFormat)
    dto_next = datetime.datetime.strptime(dt_next, datetimeFormat)
    
    if(dt_prev==dt_next):
        return (lat_prev, lon_prev)
    
    r = (dtTarget - dto_prev) / (dto_next - dto_prev)
    
    tlat = ((lat_next-lat_prev) * r) + lat_prev
    tlong = ((lon_next-lon_prev) * r) + lon_prev
    return (tlat, tlong)
    

In [None]:
def deResoluteFloat(f, p):
    x = float(int(f * p)/p)
    return x

In [None]:
(header, aisData, globalMinDT, globalMaxDT, totalLines) = readinSourceFile(sourceFile)

In [None]:
aisDataFiltered = filterAISData(aisData, 3)

In [None]:
aisData = None

In [None]:
minDTO = datetime.datetime.strptime(globalMinDT, datetimeFormat)
maxDTO = datetime.datetime.strptime(globalMaxDT, datetimeFormat)

In [None]:
def createPaths(aisShipData, globalMinDT, globalMaxDT):
    pathpoints = {}
    
    minDT = datetime.datetime.strptime(aisShipData[0]["BaseDateTime"], datetimeFormat)
    maxDT = datetime.datetime.strptime(aisShipData[-1]["BaseDateTime"], datetimeFormat)
    dtX = globalMinDT
    while dtX<=maxDT:
        pathPoint={"dt": dtX, "lat": None, "lon": None}
        pathpoints[dtX] = pathPoint
        dtX = dtX + datetime.timedelta(0, timeBucketSeconds)
  
    aisShipDataLen=len(aisShipData)

    dtX = minDT
    idxS = 1 #not the first
    
    while dtX<=maxDT:
        currentShipItem = aisShipData[idxS]
        currentShipItemDT = datetime.datetime.strptime(currentShipItem["BaseDateTime"], datetimeFormat)
        while(idxS<aisShipDataLen) and (currentShipItemDT<dtX):
            idxS = idxS + 1
            currentShipItem = aisShipData[idxS]
            currentShipItemDT = datetime.datetime.strptime(currentShipItem["BaseDateTime"], datetimeFormat)
        lastShipItem = aisShipData[idxS-1]
        lastShipItemDT = datetime.datetime.strptime(lastShipItem["BaseDateTime"], datetimeFormat)
        if(lastShipItemDT<=dtX) and (currentShipItemDT>=dtX):
            #print("found", lastShipItemDT, dtX, currentShipItemDT )
            (interp_point_lat, interp_point_long) = interpolatePointFromNeighbors(dtX, lastShipItem, currentShipItem)
            deres_lat = deResoluteFloat(interp_point_lat, deResoluteFactor)
            deres_lon = deResoluteFloat(interp_point_long, deResoluteFactor)
            pathPoint={"dt": dtX, "lat": deres_lat, "lon": deres_lon}
            pathpoints[dtX] = pathPoint
        dtX = dtX + datetime.timedelta(0 ,timeBucketSeconds)
    
    return pathpoints

In [None]:
c=0
aisPaths = {}

for mmsi in aisDataFiltered:
    mmsiData = aisDataFiltered[mmsi]
    paths=createPaths(mmsiData, minDTO, maxDTO)
    aisPaths[mmsi] = paths
    if (c % 1000 ==0):
        print(c)
    c=c+1


In [None]:
def wrapDistances(d1, d2):
    """Modify pair of lat or lon coords to correctly calc shortest distance btw them."""
    if d1 < -90 and d2 > 90:
        d2 = d2 - 360
    elif d2 < -90 and d1 > 90:
        d1 = d1 - 360
    return (d1, d2)

In [None]:
def computeDistanceKM(lat1, lon1, lat2, lon2):
    """Computes haversine distance in km from latlon1 to latlon2."""
    R = 6371
    sin, cos, radians = math.sin, math.cos, math.radians
    (lat1, lat2) = wrapDistances(lat1, lat2)
    (lon1, lon2) = wrapDistances(lon1, lon2)
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c

In [None]:
def comparePaths(pathA, pathB):
    similarities = 0
    for dtA in pathA:
        ppA = pathA[dtA]
        latA = ppA["lat"]
        lonA = ppA["lon"]
        if(latA is not None):
            if(dtA in pathB):
                ppB = pathB[dtA]
                latB = ppB["lat"]
                lonB = ppB["lon"]
                if(latB is not None):
                    if((latA==latB) and (lonA==lonB)):
                        similarities=similarities+1
    return similarities
                   

In [None]:
def showSimilarPaths(pathA, pathB):
    for dtA in pathA:
        if(dtA in pathB):
            if(pathA[dtA]["lat"] is not None and pathB[dtA]["lat"] is not None):
                print(dtA.isoformat(), "(", pathA[dtA]["lat"], pathA[dtA]["lon"], "), (", pathB[dtA]["lat"], pathB[dtA]["lon"], ")")
        

In [None]:
for mmsi in aisPaths:
    pathA = aisPaths[mmsi]
    for mmsiB in aisPaths:
        if(mmsi!=mmsiB):
            pathB = aisPaths[mmsiB]
            sims = comparePaths(pathA, pathB)
            if(sims>10):
                print(mmsi, mmsiB, sims)
                showSimilarPaths(pathA, pathB)
