# Locations Generation
Locations used to compute the coverage are generated according to three scenarions: 
1. List of subways stations
2. POI (museums and squres)
3. Grid generation
4. Mix obtained by: POI + random locations in the city center


In [None]:
import pandas as pd
import yaml
import osmnx as ox
import shapely.geometry
import pyproj
import os
import geopandas as gpd
import numpy as np



with open("./conf.yaml") as f:
    conf = yaml.load(f, Loader = yaml.FullLoader)

out_path = conf["out_path"]
beijing_center_lat = conf["beijing_center_lat"]
beijing_center_lon = conf["beijing_center_lon"]
distance_from_center = conf["distance_from_center"]
sw_lon = conf["sw_lon"]
sw_lat = conf["sw_lat"]
ne_lon = conf["ne_lon"]
ne_lat = conf["ne_lat"]
gripd_stepsize = conf["gripd_stepsize"]
pathDFHigh = conf["pathDFHigh"]
pathBox = conf["pathBox"]
pathBox



## 1. Beijing subway stations for traffic fluxes monitoring
We get subway stations in a 50km range from Beijing's city center

In [None]:
tags = {"station":"subway"}

gdf = ox.geometries.geometries_from_point((beijing_center_lat,beijing_center_lon), tags, dist = distance_from_center)
#print(gdf.head())
gdf_subway = gdf[(gdf.station == "subway") & (gdf.geometry.notna())]
print("We have {:d} stations in our range".format(len(gdf_subway)))
#we build a dataframe with useful data only (coordinates)
df = pd.DataFrame()
df["id_location"], df["lat"], df["lon"] = (gdf_subway.osmid.values, gdf_subway.geometry.centroid.y.values, gdf_subway.geometry.centroid.x.values)
print(df.head())
df.to_csv(out_path + "beijing_subway_stations.csv", index=False)

## 2.POIs: Squares, monuments, shop, railways etc 
We extract different kinds of pois in order to model an urban poi-based data collection campaign. 
(We use the same 50km radius)

In [None]:
tags = {"place":"square", "historic":"monument", "shop":"mall", "railway":"station", "amenity":"bus_station"}

gdf = ox.geometries.geometries_from_point((beijing_center_lat,beijing_center_lon), tags, dist = distance_from_center)
#print(gdf.head())
print("We have {:d} POIs in our range".format(len(gdf)))
#print(gdf)
df = pd.DataFrame()
df["id_location"], df["lat"], df["lon"] = (gdf.osmid.values, gdf.geometry.centroid.y.values, gdf.geometry.centroid.x.values)
print(df.head())
df.to_csv(out_path+"POIs.csv", index = False)

## 3. Grid generation
Grid of locations of 5Km distance, within a bounding box

In [None]:
# Set up projections
p_ll = pyproj.Proj(init='epsg:4214')
p_mt = pyproj.Proj(init='epsg:3857') # metric; same as EPSG:900913

# Create corners of rectangle to be transformed to a grid
sw = shapely.geometry.Point(sw_lon, sw_lat)
ne = shapely.geometry.Point(ne_lon, ne_lat)
# This is a wide region in the beijing area

# Project corners to target projection
transformed_sw = pyproj.transform(p_ll, p_mt, sw.x, sw.y) # Transform SW point to 3857
transformed_ne = pyproj.transform(p_ll, p_mt, ne.x, ne.y) # .. same for NE

# Iterate over 2D area
gridpoints = []
x = transformed_sw[0]
while x < transformed_ne[0]:
    y = transformed_sw[1]
    while y < transformed_ne[1]:
        p = shapely.geometry.Point(pyproj.transform(p_mt, p_ll, x, y))
        gridpoints.append(p)
        y += gripd_stepsize
    x += gripd_stepsize
    #print("generated {:d} points".format(len(gridpoints)))

grid = [(p.x, p.y) for p in gridpoints]
cols = ("lon", "lat")
grid_df = pd.DataFrame(grid, columns=cols)

grid_df.to_csv(out_path+"5000_m_grid1.csv")
print("Finished generating a regular grid with {:d} locations".format(len(grid_df)))

## 4. Mix
Mix obtained by POI + random

In [None]:
# Fetch points from osm
tags = {"place":"square", "historic":"monument", "shop":"mall", "railway":"station", "amenity":"bus_station"}

gdf = ox.geometries.geometries_from_point((beijing_center_lat,beijing_center_lon), tags, dist = distance_from_center)
print("We have {:d} POIs in our range".format(len(gdf)))
#print(gdf)
df = pd.DataFrame()
df["id_location"], df["lat"], df["lon"] = (gdf.osmid.values, gdf.geometry.centroid.y.values, gdf.geometry.centroid.x.values)


#to fix better
a = "/home/michele/Documenti/Progetti/Software/Crowdsensing/uav_station/output/stations/box.shp"
# Generate random points only within a box 
Box = gpd.GeoDataFrame(gpd.read_file(a),crs="EPSG:4326")
Box=Box.set_index('index')

boxHigh=Box.at["boxHigh",'geometry']
pdfHigh = gpd.GeoDataFrame({'geometry':[boxHigh]},crs="EPSG:4326")

boundsBox = boxHigh.bounds
minlat=boundsBox[1]
maxlat=boundsBox[3]
minlon=boundsBox[0]
maxlon=boundsBox[2]

#old value 2000
augemented_locations = 1000
xpoints = np.random.uniform(0,0.1,augemented_locations) + minlon
ypoints = np.random.uniform(0,0.1,augemented_locations)+minlat
id_locations = np.random.uniform(1,100000,augemented_locations)

extra_ponts = pd.DataFrame()
extra_ponts["id_location"] = id_locations
extra_ponts["lat"] = ypoints
extra_ponts["lon"] = xpoints

dfs = [df,extra_ponts]
df_mix = pd.concat(dfs)
df_mix.to_csv(out_path+"MIX.csv", index = False)



# Test: Visualizing locations and trajectories
- Load the locations (grid or subways or pois)
- Load the trajectories
- Visualize locations and some trajectories


In [None]:
locations = pd.read_csv(os.path.join(out_path, "beijing_subway_stations.csv"))
print("There are a total of {:d} locations in our dataset".format(len(locations)))
locations["location"] = locations.index.values
print(locations.head())

In [None]:
cols = ["lat", "lon", "uid", "tid","date_time"]
dataset = pd.read_csv(out_path + "geolife_full_augmented.csv", usecols=cols, parse_dates = True)
print("Successfully loaded trajectory dataset")
print(dataset.head())

In [None]:
import skmob
import folium

tdf = skmob.TrajDataFrame(dataset, latitude='lat', longitude="lon", datetime='date_time', user_id='uit', trajectory_id="tid")

m = folium.Map(location=[39.9042, 116.4074], tiles="Stamen Toner")

for lat, lon, index in zip(locations.lat.values, locations.lon.values, locations.index.values):
    #folium.Marker([lat, lon], popup="location" + str(index), tooltip=str(index)).add_to(m)
    folium.Circle(
        location=[lat, lon],
        radius=500,
        popup='location {:d} radius'.format(index),
        color='#3186cc',
        fill=True,
        fill_color='#3186cc'
        ).add_to(m)

#change this to plot more trajectories
trajs_to_plot = 100
for tid in tdf.tid.unique():
    tdf[tdf["tid"] == tid].plot_trajectory(m, weight=4, opacity=1, start_end_markers=False)
    trajs_to_plot -= 1
    if (trajs_to_plot == 0):
        break

m
#m.save(outfile= out_path + "locations_grid_trajectories.html")

In [None]:
# Flows
from skmob.tessellation import tilers
tessellation = tilers.tiler.get("squared", base_shape="Beijing, China", meters=15000)
fdf = tdf.to_flowdataframe(tessellation=tessellation, self_loops=True)

In [None]:
m = fdf.plot_flows(flow_color='red',flow_weight=10)
fdf.plot_tessellation(popup_features=['tile_ID', 'population'],map_osm=m)
m

# Different strategies for location picking
we investigate new strategies for choosing our locations.

We begin with a random picking and then we get picks from an external csv.

## Random picking

In [None]:
import random as rand

#picking 50 random points in our inner bounding box perimeter
points = 50
rd_pts = [(rand.uniform(39.8, 40.0), (rand.uniform(116.2, 116.6))) for i in range(points)]
cols = ("lat", "lon")
rd_locations = pd.DataFrame(rd_pts, columns=cols)

In [None]:
print(rd_locations.head())

Now we visualize the random grid in a folium map

In [None]:
rd_m = folium.Map(location=[39.9042, 116.4074], tiles="Stamen Toner")

for lat, lon, index in zip(rd_locations.lat.values, rd_locations.lon.values, rd_locations.index.values):
    #folium.Marker([lat, lon], popup="location" + str(index), tooltip=str(index)).add_to(rd_m)
    folium.Circle(
        location=[lat, lon],
        radius=1000,
        popup='location {:d} radius'.format(index),
        color='#3186cc',
        fill=True,
        fill_color='#3186cc'
        ).add_to(rd_m)

#change this to plot more trajectories
trajs_to_plot = 20
for tid in tdf.tid.unique():
    tdf[tdf["tid"] == tid].plot_trajectory(rd_m, weight=4, opacity=1, start_end_markers=False)
    trajs_to_plot -= 1
    if (trajs_to_plot == 0):
        break

rd_m

## CSV loading
we try loading a previously generated csv with POIs (such as subway stations, in this example) in order to use them as locations.

In [None]:
df = pd.read_csv(data_path+"beijing_subway_stations.csv")
df = df[(df['lat'].between(39.8, 40.0)) & (df['lon'].between(116.2, 116.6))]
print(df.info())

In [None]:
csv_m = folium.Map(location=[39.9042, 116.4074], tiles="Stamen Toner")

for lat, lon, name in zip(df.lat, df.lon, df.name):
    #folium.Marker((lat, lon), popup=name).add_to(csv_m)
    folium.Circle(
        location=[lat, lon],
        radius=250,
        #popup='{:s} station radius'.format(name),
        color='#3186cc',
        fill=True,
        fill_color='#3186cc'
        ).add_to(csv_m)

#change this to plot more trajectories
trajs_to_plot = 20
for tid in tdf.tid.unique():
    tdf[tdf["tid"] == tid].plot_trajectory(csv_m, weight=4, opacity=1, start_end_markers=False)
    trajs_to_plot -= 1
    if (trajs_to_plot == 0):
        break


csv_m

## Considerations on fixed locations
Metro stations might be especially useful to get coverage from people going by foot, while other types of facilities might be exploited for different kinds of mobility profiles.
Motorway Junctions could be good if we had to address a mainly car-based mobility profile, while bycicle lanes could be used for cyclists.