In [1]:
import numpy as np
import matplotlib.pyplot as plt
from src.datareader import DataReader
from src.fastspd import FastSPD
import overpy
from collections import defaultdict
import time

In [2]:
data_reader = DataReader("./VED_dataset/*.csv", files_limit=100)
data = data_reader.get_data()

In [3]:
def corners_of_map(data):
    max_long, min_long, max_lat, min_lat = -10e10, 10e10, -10e10, 10e10
    for row in data:
        max_long, min_long = max(max(row[:,1]),max_long), min(min(row[:,1]),min_long)
        max_lat, min_lat = max(max(row[:,2]),max_lat), min(min(row[:,2]),min_lat)
    return max_long, min_long, max_lat, min_lat

area = corners_of_map(data)
print(area)

(42.3258, 42.2203052778, -83.674, -83.8042902778)


In [4]:
def get_listings(area):
    '''
    takes area (x1,y1,x2,y2) and returns lists of POIs from categories:
    (shops, industries, offices, education)
    '''
    def get_shops(data):
        api = overpy.Overpass()
        x1, x2 = data[1], data[0]
        y1, y2 = data[3], data[2]

        api_call = api.query(f'''(node["shop"]({x1},{y1},{x2},{y2});
                    node["building"="retail"]({x1},{y1},{x2},{y2});
                    node["building"="supermarket"]({x1},{y1},{x2},{y2});
                    node["healthcare"="pharmacy"]({x1},{y1},{x2},{y2});
                            );out;''')
        return api_call
    
    def get_offices(data):
        api = overpy.Overpass()
        x1, x2 = data[1], data[0]
        y1, y2 = data[3], data[2]

        api_call = api.query(f'''(node["office"]({x1},{y1},{x2},{y2});
                    node["building"="office"]({x1},{y1},{x2},{y2});
                    node["amenity"="bank"]({x1},{y1},{x2},{y2});
                            );out;''')
        return api_call
    
    def get_industries(data):
        api = overpy.Overpass()
        x1, x2 = data[1], data[0]
        y1, y2 = data[3], data[2]

        api_call = api.query(f'''(node["building"="industrial"]({x1},{y1},{x2},{y2});
                    node["building"="warehouse"]({x1},{y1},{x2},{y2});
                            );out;''')
        return api_call
    
    def get_education(data):
        api = overpy.Overpass()
        x1, x2 = data[1], data[0]
        y1, y2 = data[3], data[2]

        api_call = api.query(f'''(node["amenity"="college"]({x1},{y1},{x2},{y2});
                    node["amenity"="school"]({x1},{y1},{x2},{y2});
                    node["amenity"="university"]({x1},{y1},{x2},{y2});
                    node["amenity"="music_school"]({x1},{y1},{x2},{y2});
                            );out;''')
        return api_call
    
    
    shops_list = get_shops(area).nodes
    time.sleep(5)
    offices_list = get_offices(area).nodes
    time.sleep(5)
    industries_list = get_industries(area).nodes
    time.sleep(5)
    edu_list = get_education(area).nodes
    return shops_list, industries_list, offices_list, edu_list


listings = get_listings(area)
shops_list, industries_list, offices_list, edu_list = listings

In [5]:
def pois_for_point(point, listings):
    '''
    Calculates number of POIs from given listings in close range of given point
    
    example:
    point=(x1,y1,x2,y2)
    listings=( list_of_shops, list_of_offices, ... )
    '''
    def area_around_point(point):
        t = 0.005
        lati,longi = point
        x1, x2 = round(longi-t,4), round(longi+t,4)
        y1, y2 = round(lati-t,4), round(lati+t,4)
        return x1,y1,x2,y2
    
    def count_in_list(listing, area):
        counter = 0
        long1,lat1,long2,lat2 = area
        for node in listing:
            if lat1 < node.lat <lat2 and long1 < node.lon < long2:
                counter+=1
        return counter
    
    aoi = area_around_point(point)
    POIs_count = np.zeros(len(listings),dtype=np.int8)
    for c, category in enumerate(listings):
        POIs_count[c] = count_in_list(category, aoi)
    
    return POIs_count


example_point = (42.3051058, -83.6929794)
res = pois_for_point(example_point, listings)
print(f"Number of shops: {res[0]}; number of industries buildings: {res[1]}; number of offices buildings: {res[2]}; number of education buildings: {res[2]}")

Number of shops: 5; number of industries buildings: 0; number of offices buildings: 2; number of education buildings: 2


In [6]:
fastspd = FastSPD()
compressed_data = list()
for r,row in enumerate(data):
    compressed_data.append(fastspd.compress(row))

In [7]:
len(compressed_data)

27839

In [8]:
def pois_matrix(data, listings, trajectory_max_length=1000):
    save_point = 1000
    spid = 0
    
    matrix_of_pois = np.zeros((len(data), trajectory_max_length, len(listings)),dtype=np.int8)
    for t, trajectory in enumerate(data):
        # save trajectories to file every save_point
        if t % save_point == 0 and t != 0:
            np.savetxt(f"pois3/pois_{spid}.csv", matrix_of_pois[save_point*spid:save_point*(spid+1)].reshape((save_point,-1)).astype(int), delimiter=";")
            spid += 1
        for i, row in enumerate(trajectory):
            if i > trajectory_max_length-1:
                break
            lat, lon = row[1], row[2]
            point = lat, lon
            matrix_of_pois[t][i] = pois_for_point(point,listings)
        
    return matrix_of_pois

result = pois_matrix(compressed_data, listings)

In [12]:
result[1][0]

array([99,  0, 29,  0], dtype=int8)