In [None]:
from __future__ import absolute_import, division, print_function
from mayavi import mlab
import numpy as np
import math
import pandas as pd
from sklearn.preprocessing import normalize
import pyproj
import pptk
from ripser import ripser
from ripser import Rips
from datetime import datetime

# Define the projection map
proj = pyproj.Proj(proj='utm', zone=50, ellps='WGS84')

# Define the DataFrame
df = pd.read_csv("dataset_raw_full.csv")

# Create new column with encoding for each label in the dataframe
labels_cat = pd.Categorical(df.Label)
df['Encoding'] = labels_cat.codes

# Create dictionary with (key=label, value=encoding)
# {'taxi': 7, 'walk': 9, 'bus': 3, 'train': 8, 'car': 4, 'airplane': 0,
# 'subway': 6, 'bike': 1, 'run': 5, 'boat': 2}
labels = df['Label'].drop_duplicates()
encodings = df['Encoding'].drop_duplicates()
label_map = dict(zip(labels, encodings))

# Filter the data on latitude and longitude to obtain only the points in centre/suburbs
lat_long_mask = (df['Latitude'] > 39.5) & (df['Latitude'] < 40.25) & (df['Longitude'] > 115.5) & (df['Longitude'] < 116.7)
df = df[lat_long_mask]

# Filter the data on daytime traffic
df['Date_Time'] = pd.to_datetime(df['Date_Time'])
daytime_mask = (df['Date_Time'].dt.hour >= 7) & (df['Date_Time'].dt.hour < 9) # from 07.00 to 09.00
df = df[daytime_mask]

# Filter the data on car traffic
car_encoding = label_map['car']
car_mask = (df['Encoding'] == car_encoding)
df = df[car_mask]

# Create coordinate projection
x, y = proj(df['Longitude'].tolist(), df['Latitude'].tolist())
p = np.c_[x, y, 0.3048 * df['Altitude']] # convert alt to meters
v = pptk.viewer(p)

# Create persistence diagrams
x_coords = df['Longitude'].tolist()
y_coords = df['Latitude'].tolist()

# Construct the point cloud
point_cloud = np.column_stack((x_coords, y_coords))
diagrams = ripser(point_cloud)['dgms']

# Print the persistence diagrams
for diagram in diagrams:
    print(diagram)
