In [1]:
from intervaltree import IntervalTree
from shapely.geometry import Polygon
import numpy as np
import pandas as pd
import geopandas
import numpy as np
from shapely import wkt
import pandas as pd
import numpy as np 
from intervaltree import IntervalTree
from datetime import datetime, timedelta

In [2]:
# find trajectory pair (ti, gi) - (temporal extent, spatial extent)
class Trajectory:
    def __init__(self, traj_id, tgpairs):
    
        # we can put any kind of object, such as points, linestrings, polygons....
        self.traj_id = traj_id
        self.tgpairs = tgpairs[tgpairs['id']==self.traj_id]
                
    def __str__(self):
        return f"{self.traj_id}" # replace as needed
    
    def get_trajprs(self):
        
        self.trajectory_pairs = []
        for i in range(len(self.tgpairs)):
            spatial_ele = self.tgpairs['geom'].iloc[i]
            self.trajectory_pairs.append([ self.tgpairs['ts'].iloc[i], spatial_ele]) # list of temporal and spatial extent
        return self.trajectory_pairs
        
# t1 = Trajectory(2898, tdf)
# print(t1)
# print(t1.get_trajprs()[0:5])

In [46]:
class GIT:
        
    def __init__(self, df, del_x=10, del_y=10):
        
        self.trajDF = df
        self.delta_x, self.delta_y = del_x, del_y
        self.sf_xmin = int(self.trajDF.bounds['minx'].min())-self.delta_x
        self.sf_ymin = int(self.trajDF.bounds['miny'].min())-self.delta_y
        self.sf_xmax = int(self.trajDF.bounds['maxx'].max())+self.delta_x
        self.sf_ymax = int(self.trajDF.bounds['maxy'].max())+self.delta_y
        self.GridTree = {}
        self.grid = {}
        
        # define grid cells
        for x in range(self.sf_xmin, self.sf_xmax, self.delta_x):
            for y in range(self.sf_ymin, self.sf_ymax, self.delta_y):
                cell_idex = (int(x/self.delta_x+1), int(y/self.delta_y+1))
                spatial_info = Polygon([(x, y), (x+self.delta_x, y), (x+self.delta_x, y+self.delta_y), (x, y+self.delta_y)])
                                
                #define empty interval tree in each grid cell
                self.GridTree[cell_idex] = IntervalTree()
                self.grid[cell_idex] = spatial_info
            
    def __str__(self):
        return 'G-IT Index SF:({sf_xmin}, {sf_ymin}):({sf_xmax}, {sf_ymax}), deltaX:{delta_x}, deltaY:{delta_y}'.format(
            sf_xmin=self.sf_xmin,
            sf_ymin=self.sf_ymin,
            sf_xmax=self.sf_xmin,
            sf_ymax=self.sf_ymax,
            delta_x=self.delta_x,
            delta_y=self.delta_y)  # replace as needed


    def FindIntsect(self,plygs, grid):
        # find grid cells which intersect with one object at a specific time
        # polygons from trajectory obejct have time and spatial elements
        # grid object has grid index and polygons

        polygon = plygs[1]
        intersecting_cells = []

        # check each grid cell with polygon, grid[:,1]=polygon coordinate
        for cell_index, cell_loc in self.grid.items():
            if cell_loc.intersects(polygon):
                intersecting_cells.append([cell_index, cell_loc]) #grid[i,0]: grid index
        interset_t = [plygs[0], intersecting_cells]
        return interset_t
    
    def insert(self, trajID):
        
        self.trajID = trajID
        t1 = Trajectory(self.trajID, self.trajDF)
        self.trajprs = t1.get_trajprs()
        
        traj_cell = []
        for time in range(len(self.trajprs)):
            int_result = self.FindIntsect(self.trajprs[time], self.grid) #polygon at t0.
            traj_cell.append(int_result)
        
        # define time interval for each grid cell
        total_grid_set = pd.DataFrame([])
        for ti in range(len(traj_cell)):
            IntSecCells = np.array(traj_cell[ti][1], dtype='object')[:,0]
            IntSecCells = pd.DataFrame(IntSecCells)
            total_grid_set = pd.concat([total_grid_set, IntSecCells], ignore_index=True, axis=1) #spatial info at t(i)

        total_interval = np.array(traj_cell, dtype='object')[:,0]
        # find unique grid cell
        check = total_grid_set.to_numpy().flatten()

        # get rid of nan values
        result = []
        for i in check:
            if i == i:
                result.append(i)
                
        # find unique value in the result list, which contain grid cells info intersecting within whole life span
        unique_list = []       
        for i in result:
            if i not in unique_list:
                unique_list.append(i)
        
        # find time interval such as (ti, tj)
        for each_cell in unique_list:
            
            cell_col = []
            for num_col in range(total_grid_set.shape[1]):
                if each_cell in list(total_grid_set.iloc[:,num_col].values):
                    cell_col.append(num_col)
            start_index = cell_col[0]
            end_inedx = cell_col[-1]
            
            # Add time interval for each grid cell 
            start_time = total_interval[start_index]
            end_time = total_interval[end_inedx]
            if start_time == end_time:
                self.GridTree[each_cell][start_time : start_time + timedelta(seconds=1)] = self.trajID
            else:
                self.GridTree[each_cell][start_time : end_time] = self.trajID
            
        print(f'Id:{self.trajID} is completed!')
        return self.GridTree
    
    def delete_by_id(self, trjId):
        
        self.id = trjId
        # 1. delete trajectory in the table (pandas dataframe)
        self.trajDF = self.trajDF.drop(self.trajDF[self.trajDF['id'] == self.id].index)
        
        # 2. delete trajectory in the Grid tree
        for cell_id, tree in self.GridTree.items():
            delete_items = [interval for interval in tree if interval.data == self.id]
            for item in delete_items:
                tree.remove(item)
        return self.GridTree

    def t_window(self, time=('2012-11-24 16:50:00', '2012-11-25 16:50:00')):
        from datetime import datetime 
        try:        
            start_time = datetime.strptime(time[0], '%Y-%m-%d %H:%M:%S')
            end_time = datetime.strptime(time[1], '%Y-%m-%d %H:%M:%S')

            traj_id = []
            for cell_id, tree in self.GridTree.items():
                intervalset = sorted(tree[start_time:end_time])
                for interval in intervalset:
                    traj_id.append(interval.data)

            # return only unique trajectory id
            traj_id_unique = []       
            for i in traj_id:
                if i not in traj_id_unique:
                    traj_id_unique.append(i)

            print('Intersecting trajectory ID:', traj_id_unique)
            return traj_id_unique
        
        except: 
            print('The date string does not match the expected format')

    def sp_window(self, p1 = (0,0), p2 = (0,0)):
        
        # find cells that contain the query box
        xmin = int(p1[0]/self.delta_x+1)
        xmax = int(p2[0]/self.delta_x+1)
        ymin = int(p1[1]/self.delta_y+1)
        ymax = int(p2[1]/self.delta_y+1)
        
        cell_list = []
        for cell_x in range(xmin, xmax+1):
            for cell_y in range(ymin, ymax+1):
                cell_list.append((cell_x, cell_y))
        
        # find trajectory which overlab with query box. 
        traj_id = []
        for cell in cell_list:
            for tree in self.GridTree[cell]:
                traj_id.append(tree.data)
        
        # return only unique trajectory id
        traj_id_unique = []       
        for i in traj_id:
            if i not in traj_id_unique:
                traj_id_unique.append(i)
        
        print('Intersecting trajectory ID:', traj_id_unique)
        return traj_id_unique
    
    def st_window(self, p1=(0,0), p2=(0,0), time=('2012-11-24 16:50:00', '2012-11-25 16:50:00')):
        
        from datetime import datetime
        # find cells that contain the query box
        xmin = int(p1[0]/self.delta_x+1)
        xmax = int(p2[0]/self.delta_x+1)
        ymin = int(p1[1]/self.delta_y+1)
        ymax = int(p2[1]/self.delta_y+1)
        
        cell_list = []
        for cell_x in range(xmin, xmax+1):
            for cell_y in range(ymin, ymax+1):
                cell_list.append((cell_x, cell_y))
               
        try:        
            start_time = datetime.strptime(time[0], '%Y-%m-%d %H:%M:%S')
            end_time = datetime.strptime(time[1], '%Y-%m-%d %H:%M:%S')

            traj_id = []
            for cell in cell_list:
                intervalset = sorted(self.GridTree[cell][start_time:end_time])
                for interval in intervalset:
                        traj_id.append(interval.data)

            # return only unique trajectory id
            traj_id_unique = []       
            for i in traj_id:
                if i not in traj_id_unique:
                    traj_id_unique.append(i)
                    
            print('Intersecting trajectory ID:', traj_id_unique)
            return traj_id_unique
        
        except: 
            print('The date string does not match the expected format')

In [48]:
def read_toy_dataset(path, column_names):
    traj_df = pd.read_csv(filepath_or_buffer=path, compression='gzip', header=None, sep='\t', names=column_names)
    traj_df['ts'] = pd.to_datetime(traj_df['ts'], unit='ms')
    traj_df['geom'] = traj_df['geom'].apply(wkt.loads)
    traj_df = geopandas.GeoDataFrame(traj_df, geometry='geom')
    return traj_df

path = './toy_traj.csv.gz'
columns = ['id', 'ts', 'geom']
tdf = read_toy_dataset(path, columns)
tdf.head()

Unnamed: 0,id,ts,geom
0,2415,2012-11-24 16:50:00,"POLYGON ((-920.700 152.100, -920.100 152.700, ..."
1,2415,2012-11-24 17:00:00,"POLYGON ((-920.700 152.100, -881.700 160.500, ..."
2,2415,2012-11-24 17:10:00,"POLYGON ((-920.700 152.100, -881.700 160.500, ..."
3,2415,2012-11-24 17:20:00,"POLYGON ((-920.700 152.100, -881.700 160.500, ..."
4,2415,2012-11-24 17:30:00,"POLYGON ((-920.700 152.100, -881.700 160.500, ..."


In [60]:
tdf.bounds['minx'].min()

Unnamed: 0,minx,miny,maxx,maxy
0,-920.7,136.5,-876.9,160.5
1,-920.7,136.5,-876.9,160.5
2,-920.7,136.5,-876.9,160.5
3,-920.7,136.5,-876.9,160.5
4,-920.7,136.5,-876.9,160.5
...,...,...,...,...
385784,711.3,-6.9,882.3,218.1
385785,711.3,-6.9,882.3,218.1
385786,711.3,-6.9,882.3,218.1
385787,711.3,-6.9,882.3,218.1


In [50]:
Gridcelltree = GIT(tdf, del_x=50, del_y=50) # create a grid index tree

In [53]:
print(Gridcelltree)

G-IT Index SF:(-1088, -822):(-1088, 827), deltaX:50, deltaY:50


In [None]:

# First Insert Query
# I try to insert 30 different unique trajectories
per=0
UniqueIDList = tdf['id'].unique()[0:30]
for i in UniqueIDList:
    check = Gridcelltree.insert(trajID=i)
    per+=1
    percentage = round((100*per)/len(UniqueIDList))
    print(f'Complete: {percentage}%', )

# Second Delete Query
Gridcelltree.delete_by_id(trjId=UniqueIDList[0])
Gridcelltree.delete_by_id(trjId=UniqueIDList[25])

# Third Spatial window query
Gridcelltree.sp_window(p1 = (100, -550), p2 = (900, 350))
Gridcelltree.sp_window(p1 = (500, -550), p2 = (900, 350))

# Fourth Temporal window query
Gridcelltree.t_window(time=('2012-05-24 16:50:00', '2012-11-29 16:50:00'))
Gridcelltree.t_window(time=('2012-08-24 00:0:00', '2012-10-05 12:50:00'))

# Fifth 
Gridcelltree.st_window(p1 = (100, -550), p2 = (900, 350), time=('2012-08-24 00:0:00', '2012-10-05 12:50:00'))
Gridcelltree.st_window(p1 = (500, -550), p2 = (900, 350), time=('2012-08-24 00:0:00', '2012-10-05 12:50:00'))
Gridcelltree.st_window(p1 = (100, -550), p2 = (900, 350), time=('2012-05-24 16:50:00', '2012-11-29 16:50:00'))
Gridcelltree.st_window(p1 = (500, -550), p2 = (900, 350), time=('2012-05-24 16:50:00', '2012-11-29 16:50:00'))