In [1]:
from __future__ import absolute_import, division, print_function
from mayavi import mlab
import numpy as np
import math
import pandas as pd
from sklearn.preprocessing import normalize
import pyproj
import pptk

In [2]:
#random examples
def show_spheres():
    alpha = np.linspace(0, 2*math.pi, 100)  

    xs = np.cos(alpha)
    ys = np.sin(alpha)
    zs = np.zeros_like(xs)

    mlab.points3d(0,0,0)
    plt = mlab.points3d(xs[:1], ys[:1], zs[:1])

    @mlab.animate(delay=100)
    def anim():
        f = mlab.gcf()
        while True:
            for (x, y, z) in zip(xs, ys, zs):
                print('Updating scene...')
                plt.mlab_source.set(x=x, y=y, z=z)
                yield

    anim()
    mlab.show()
    
def show_dots():
    x, y, z, value = np.random.random((4, 40))
    mlab.points3d(x, y, z, value)
    mlab.show()
def show_sphere():
    mlab.clf()
    phi, theta = np.mgrid[0:np.pi:11j, 0:2*np.pi:11j]
    x = np.sin(phi) * np.cos(theta)
    y = np.sin(phi) * np.sin(theta)
    z = np.cos(phi)
    mlab.mesh(x, y, z)
    mlab.mesh(x, y, z, representation='wireframe', color=(0, 0, 0))
    mlab.show()
    
def show_surface():
    mlab.clf()
    x, y = np.mgrid[-10:10:100j, -10:10:100j]
    r = np.sqrt(x**2 + y**2)
    z = np.sin(r)/r
    mlab.surf(z, warp_scale='auto')
    mlab.show()

def test_points3d(long,lat,z):
    t = np.linspace(0, 4 * np.pi, 20)
    s = 1
    mlab.points3d(long, lat, z, colormap="copper", scale_factor=.25)   
    mlab.show()
# Our current dataset
def traffic_data(lat,long,alt):
    mlab.clf()
    xv, yv = np.meshgrid(lat, long)
    #r = np.sqrt(xv**2 + yv**2)
    z = np.sqrt(xv**2 + yv**2)
    mlab.mesh(xv, yv, z)
    mlab.mesh(xv, yv, z, representation='wireframe', color=(0, 0, 0))
    mlab.show()

In [32]:
#define the projection map
proj = pyproj.Proj(proj='utm', zone=50, ellps='WGS84')

#define the database
df = pd.read_csv("dataset_raw_full.csv")
df.head(400)

Unnamed: 0,Latitude,Longitude,Altitude,Date_Time,Id_user,Id_perc,Label
0,41.741415,86.186028,-777.0,2008-03-31 16:00:08,10,20080331160008.plt,taxi
1,41.737063,86.179470,-777.0,2008-03-31 16:01:07,10,20080331160008.plt,taxi
2,41.734105,86.172823,-777.0,2008-03-31 16:02:07,10,20080331160008.plt,taxi
3,41.739110,86.166563,-777.0,2008-03-31 16:03:06,10,20080331160008.plt,taxi
4,41.744368,86.159987,-777.0,2008-03-31 16:04:05,10,20080331160008.plt,taxi
...,...,...,...,...,...,...,...
395,39.474037,75.990490,4088.0,2008-04-02 06:22:41,10,20080402060926.plt,walk
396,39.474037,75.990477,4090.0,2008-04-02 06:22:42,10,20080402060926.plt,walk
397,39.474040,75.990462,4091.0,2008-04-02 06:22:43,10,20080402060926.plt,walk
398,39.474045,75.990448,4090.0,2008-04-02 06:22:44,10,20080402060926.plt,walk


In [67]:
#create categorical for each label in the dataframe
labels_cat = pd.Categorical(df.Label)
df['encoding'] = labels_cat.codes

#add convert the label data to integers
labels = df["Label"].drop_duplicates()
encodings = df["encoding"].drop_duplicates()
print(labels)

#create the map for each label in the data
label_map = dict(zip(labels, encodings))
print(label_map)

df.head(10)

0              taxi
22             walk
791             bus
1477          train
143449          car
203971     airplane
280608       subway
331763         bike
581929          run
1551126        boat
Name: Label, dtype: object
{'taxi': 7, 'walk': 9, 'bus': 3, 'train': 8, 'car': 4, 'airplane': 0, 'subway': 6, 'bike': 1, 'run': 5, 'boat': 2}


Unnamed: 0,Latitude,Longitude,Altitude,Date_Time,Id_user,Id_perc,Label,encoding
0,41.741415,86.186028,-777.0,2008-03-31 16:00:08,10,20080331160008.plt,taxi,7
1,41.737063,86.17947,-777.0,2008-03-31 16:01:07,10,20080331160008.plt,taxi,7
2,41.734105,86.172823,-777.0,2008-03-31 16:02:07,10,20080331160008.plt,taxi,7
3,41.73911,86.166563,-777.0,2008-03-31 16:03:06,10,20080331160008.plt,taxi,7
4,41.744368,86.159987,-777.0,2008-03-31 16:04:05,10,20080331160008.plt,taxi,7
5,41.744513,86.159808,-777.0,2008-03-31 16:05:04,10,20080331160008.plt,taxi,7
6,41.748142,86.15533,-777.0,2008-03-31 16:06:03,10,20080331160008.plt,taxi,7
7,41.74964,86.153458,-777.0,2008-03-31 16:07:02,10,20080331160008.plt,taxi,7
8,41.754737,86.148085,-777.0,2008-03-31 16:08:02,10,20080331160008.plt,taxi,7
9,41.758855,86.144468,-777.0,2008-03-31 16:09:01,10,20080331160008.plt,taxi,7


In [68]:
#get longitude, latitude, altitude from the dataframe
long = normalize([np.array(df["Longitude"].to_list())])
lat = normalize([np.array(df["Latitude"].to_list())])
alt = normalize([np.array(df["Altitude"].to_list())])

#create the metrics projection
x, y = proj(df['Longitude'].tolist(), df['Latitude'].tolist())

In [69]:
#create the projection array
P = np.c_[df['Longitude'].tolist(), df['Latitude'].tolist(),np.zeros(len(df))]

#define the pptviewr parameters to better visualize the data
v = pptk.viewer(P)
v.attributes(df['Altitude'])
#v.color_map('jet', scale=[0, 20000])

In [148]:
#filter the data witht the usefull point cluster
mask_51 = (df['Longitude'] > 115.5) & (df['Longitude'] < 116.7) & (df['Latitude'] > 39.5) & (df['Latitude'] < 40.25)
df1 = df[mask_51]

#create the projection 
x, y = proj(df1['Longitude'].tolist(), df1['Latitude'].tolist())
p = np.c_[x, y, 0.3048 * df1['Altitude']]  # alt originally in feet

#visualize the mask levels by the labels
mask_labelled = df1['encoding'] != 0
print(df1[mask_labelled]['encoding'])
v = pptk.viewer(p[mask_labelled])
v.attributes(df1[mask_labelled]['encoding'])

41880      3
41881      3
41882      3
41883      3
41884      3
          ..
3387298    3
3387299    3
3387300    3
3387301    3
3387302    3
Name: encoding, Length: 2818377, dtype: int8


In [145]:
def print_label(df2, label, long_s, long_f, lat_s, lat_f):
    mask_52 = (df['Longitude'] > long_s) & (df['Longitude'] < long_f) & (df['Latitude'] > lat_s) & (df['Latitude'] < lat_f) &(df1['encoding'] == label)
    df2 = df[mask_52]

    #create the projection 
    x, y = proj(df2['Longitude'].tolist(), df2['Latitude'].tolist())
    p = np.c_[x, y, 0.3048 * df2['Altitude']]  # alt originally in feet

    #visualize the mask levels by the labels
    mask_labelled = df2['encoding'] == label
    #print(df2[mask_labelled]['encoding'])
    v = pptk.viewer(p[mask_labelled])
    v.attributes(df2[mask_labelled]['encoding'])
    


In [147]:
print_label(df, 3, 115.5, 116.7, 39.5, 40.25)
print_label(df, 8, 115.5, 116.7, 39.5, 40.25)
print_label(df, 4, 115.5, 116.7, 39.5, 40.25)
print_label(df, 9, 115.5, 116.7, 39.5, 40.25)