In [1]:
import s3fs
import pyarrow as pa
import pyarrow.dataset as ds

import sys
import os

import pyspark
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql import Window

import pandas as pd
import polars as pl
import altair as alt
import matplotlib.pyplot as plt

import geopandas as gpd

In [3]:
spark = SparkSession.builder.remote("sc://vlenpmod302spk3.hevs.ch:15002").getOrCreate()

In [4]:
df=spark.read.parquet("hdfs://vlenpmod302spk1.hevs.ch:9000/mod302/nyc-taxi.parquet")
df.show(2)

+---------+-------------------+-------------------+---------------+-------------+----------------+---------------+------------+------------------+-----------------+----------------+------------+-----------+-----+-------+----------+------------+------------+---------------------+------------------+-------------------+--------------------+----+-----+---+
|vendor_id|          pickup_at|         dropoff_at|passenger_count|trip_distance|pickup_longitude|pickup_latitude|rate_code_id|store_and_fwd_flag|dropoff_longitude|dropoff_latitude|payment_type|fare_amount|extra|mta_tax|tip_amount|tolls_amount|total_amount|improvement_surcharge|pickup_location_id|dropoff_location_id|congestion_surcharge|year|month|day|
+---------+-------------------+-------------------+---------------+-------------+----------------+---------------+------------+------------------+-----------------+----------------+------------+-----------+-----+-------+----------+------------+------------+---------------------+-----------

In [5]:
df_small = df.filter((df.month == 1) & (df.year == 2010) & (df.day == 21))

In [6]:
#compute a new column trip_duration in seconds by substracting pickup_datetime from dropoff_datetime

df_small = df_small.withColumn("trip_duration", F.unix_timestamp("dropoff_at") - F.unix_timestamp("pickup_at"))
df_small.show(2)

+---------+-------------------+-------------------+---------------+-------------+----------------+---------------+------------+------------------+-----------------+----------------+------------+-----------+-----+-------+----------+------------+------------+---------------------+------------------+-------------------+--------------------+----+-----+---+-------------+
|vendor_id|          pickup_at|         dropoff_at|passenger_count|trip_distance|pickup_longitude|pickup_latitude|rate_code_id|store_and_fwd_flag|dropoff_longitude|dropoff_latitude|payment_type|fare_amount|extra|mta_tax|tip_amount|tolls_amount|total_amount|improvement_surcharge|pickup_location_id|dropoff_location_id|congestion_surcharge|year|month|day|trip_duration|
+---------+-------------------+-------------------+---------------+-------------+----------------+---------------+------------+------------------+-----------------+----------------+------------+-----------+-----+-------+----------+------------+------------+-----

In [7]:
import plotly.express as px
import pandas as pd

df_small_list = [row.asDict() for row in df_small.take(1000)]  # Convert each row to a dictionary
pandas_df_small = pd.DataFrame(df_small_list) 

fig = px.scatter_mapbox(
    pandas_df_small, 
    lat="pickup_latitude", 
    lon="pickup_longitude", 
    hover_name="trip_duration",
    zoom=10, 
    height=1024,
    mapbox_style="open-street-map"
)

center_coordinates = {"lat": 40.7580, "lon": -73.9855}

fig.update_layout(
    mapbox=dict(
        center=center_coordinates
    )
)

fig.show()
fig.write_html("plotly_map.html")


In [1]:
import os

api_key = os.getenv("GOOGLE_MAPS_API_KEY")

if api_key is None:
    print("Error: No API key found. Please set the environment variable.")
else:
    print("API key loaded successfully.")

API key loaded successfully.


In [3]:
import requests

def get_walking_time(origin, destination, api_key):
    """
    Calculate walking time between two points using Google Maps Directions API.
    
    Args:
    - origin (tuple): A tuple containing latitude and longitude of the origin point.
    - destination (tuple): A tuple containing latitude and longitude of the destination point.
    - api_key (str): Your Google Maps API key.
    
    Returns:
    - (str, int): A tuple containing walking time in human-readable format and duration in seconds.
    """
    # Directions API endpoint URL
    url = "https://maps.googleapis.com/maps/api/directions/json"
    
    dest_lat, dest_lng = destination

    # Define the parameters for the request
    params = {
        "origin": f"{origin[0]},{origin[1]}",
        "destination": f"{destination[0]},{destination[1]}",
        "mode": "walking",
        "key": api_key
    }
    
    # Make the GET request to the Google Maps API
    response = requests.get(url, params=params)
    data = response.json()

    # Check the status and extract the duration if the status is "OK"
    if data["status"] == "OK":
        # print(data)
        route = data["routes"][0]
        leg = route["legs"][0]
        walking_time = leg["duration"]["text"]  # Text of the duration
        walking_time_seconds = leg["duration"]["value"]  # Duration in seconds

        return walking_time, walking_time_seconds
    else:
        # Handle potential errors in the API response
        print(f"Error: {data['status']}")
        return None, None


In [4]:
origin = (40.754734, -73.96569)
destination = (40.765522, -73.954025) 

walking_time, walking_time_seconds = get_walking_time(origin, destination, api_key)

if walking_time:
    print(f"Estimated walking time: {walking_time}, {walking_time_seconds} seconds")
else:
    print("Unable to calculate walking time.")

Estimated walking time: 25 mins, 1504 seconds


In [5]:
import requests

def get_route_data(origin, destination, api_key):
    """
    Make an API call to Google Maps Directions API to get route data.
    
    Args:
    - origin (tuple): A tuple containing latitude and longitude of the origin point.
    - destination (tuple): A tuple containing latitude and longitude of the destination point.
    - api_key (str): Your Google Maps API key.
    
    Returns:
    - dict: A dictionary containing the full response from the Google Maps API.
    """
    # Directions API endpoint URL
    url = "https://maps.googleapis.com/maps/api/directions/json"
    
    # Define the parameters for the request
    params = {
        "origin": f"{origin[0]},{origin[1]}",
        "destination": f"{destination[0]},{destination[1]}",
        "mode": "walking",
        "key": api_key
    }
    
    # Make the GET request to Google Maps API
    response = requests.get(url, params=params)
    data = response.json()

    # Check the status and return the data if the status is "OK"
    if data["status"] == "OK":
        return data
    else:
        # Handle potential errors in the API response
        print(f"Error: {data['status']}")
        return None

def extract_walking_time(route_data):
    """
    Extract walking time from the route data obtained from Google Maps API.
    
    Args:
    - route_data (dict): A dictionary containing the response from Google Maps API.
    
    Returns:
    - (str, int): A tuple containing walking time in human-readable format and duration in seconds.
    """
    if route_data is None:
        print("Error: No route data available to extract walking time.")
        return None, None

    # Extract duration information from route data
    try:
        route = route_data["routes"][0]
        leg = route["legs"][0]
        walking_time = leg["duration"]["text"]  # Text of the duration
        walking_time_seconds = leg["duration"]["value"]  # Duration in seconds

        return walking_time, walking_time_seconds
    except (KeyError, IndexError) as e:
        print(f"Error extracting walking time: {e}")
        return None, None

In [6]:
origin = (40.754734, -73.96569)
destination = (40.765522, -73.954025) 


# Get route data
route_data = get_route_data(origin, destination, api_key)

# Extract walking time from route data
walking_time, walking_time_seconds = extract_walking_time(route_data)

# Print the result
if walking_time:
    print(f"Estimated walking time: {walking_time}, {walking_time_seconds} seconds")
else:
    print("Unable to calculate walking time.")

Estimated walking time: 25 mins, 1504 seconds


In [7]:
def extract_addresses(route_data):
    """
    Extract the start and destination addresses from the route data obtained from Google Maps API.
    
    Args:
    - route_data (dict): A dictionary containing the response from Google Maps API.
    
    Returns:
    - (str, str): A tuple containing the start address and destination address.
    """
    if route_data is None:
        print("Error: No route data available to extract addresses.")
        return None, None

    try:
        # Access the legs information in the route
        leg = route_data["routes"][0]["legs"][0]

        # Extract the start and end addresses
        start_address = leg["start_address"]  # Start address in human-readable format
        end_address = leg["end_address"]      # End address in human-readable format

        return start_address, end_address
    except (KeyError, IndexError) as e:
        print(f"Error extracting addresses: {e}")
        return None, None

In [8]:
start_address, end_address = extract_addresses(route_data)

if start_address and end_address:
    print(f"Start address: {start_address}")
    print(f"End address: {end_address}")
else:
    print("Unable to extract addresses.")

Start address: 931 1st Ave., New York, NY 10022, USA
End address: 515 E 70th St, New York, NY 10021, USA


In [9]:
def extract_polyline(route_data):
    """
    Extract the encoded polyline from the Google Maps Directions API route data.
    
    Args:
    - route_data (dict): The full response data from the Google Maps Directions API.
    
    Returns:
    - str: The encoded polyline if available, None otherwise.
    """
    try:
        # Extract the overview polyline from the route
        polyline = route_data["routes"][0]["overview_polyline"]["points"]
        return polyline
    except (KeyError, IndexError):
        print("Error: Unable to extract polyline from the provided route data.")
        return None

In [10]:
import polyline

def decode_polyline(encoded_polyline):
    """
    Decodes an encoded polyline string into latitude/longitude pairs.

    Args:
    - encoded_polyline (str): The encoded polyline string to decode.

    Returns:
    - list: A list of latitude/longitude pairs (tuples).
    """
    if not encoded_polyline:
        print("No encoded polyline provided.")
        return None
    
    try:
        # Decode the polyline into a list of lat/lon tuples
        decoded_points = polyline.decode(encoded_polyline)
        return decoded_points
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

{
    "geocoded_waypoints": [
        {
            "geocoder_status": "OK",
            "place_id": "ChIJJxxBK-JYwokRSl6QjuCG_Y0",
            "types": ["street_address"]
        },
        {
            "geocoder_status": "OK",
            "place_id": "ChIJGVrZ-8NYwokRlkCW1UJv0Gc",
            "types": ["street_address"]
        }
    ],
    "routes": [
        {
            "bounds": {
                "northeast": {
                    "lat": 40.7664511,
                    "lng": -73.9541035
                },
                "southwest": {
                    "lat": 40.7547012,
                    "lng": -73.9656129
                }
            },
            "copyrights": "Map data ©2024 Google",
            "legs": [
                {
                    "distance": {
                        "text": "1.1 mi",
                        "value": 1779
                    },
                    "duration": {
                        "text": "25 mins",
                        "value": 1504
                    },
                    "start_address": "931 1st Ave., New York, NY 10022, USA",
                    "end_address": "515 E 70th St, New York, NY 10021, USA",
                    "start_location": {
                        "lat": 40.7547012,
                        "lng": -73.9656129
                    },
                    "end_location": {
                        "lat": 40.7654247,
                        "lng": -73.9541035
                    },
                    "steps": [
                        {
                            "distance": {
                                "text": "0.9 mi",
                                "value": 1501
                            },
                            "duration": {
                                "text": "21 mins",
                                "value": 1282
                            },
                            "start_location": {
                                "lat": 40.7547012,
                                "lng": -73.9656129
                            },
                            "end_location": {
                                "lat": 40.7664445,
                                "lng": -73.95705439999999
                            },
                            "html_instructions": "Head <b>northeast</b> on <b>1st Ave.</b> toward <b>E 52nd St</b>",
                            "polyline": {
                                "points": "{{vwF`lmbMuA}@GRKGFU]UMIcAq@ITKGHW[QcAs@OKITKGFUQMc@Wa@YWOGRKIFS[SGEmAw@GTKIFS[UIEsA}@GRKGFSyByAIRKGFSu@i@]U]UITKIHUa@Ya@W]UCAGEITKIFUeAs@i@]IVIGAAHWQKGEMIKGCAAAu@g@ADGNKIDO@Es@e@_@U]UGTMIHSgBmAGEITKGHUSM[SaAo@ITKIHUw@g@CA[UWQITKGFUw@i@YS_@UGTMGHUmBoAGRMIHSqBqAIRKGFSEE{@k@c@["
                            },
                            "travel_mode": "WALKING"
                        },
                        {
                            "distance": {
                                "text": "0.2 mi",
                                "value": 268
                            },
                            "duration": {
                                "text": "4 mins",
                                "value": 210
                            },
                            "start_location": {
                                "lat": 40.7664445,
                                "lng": -73.95705439999999
                            },
                            "end_location": {
                                "lat": 40.7653118,
                                "lng": -73.9541886
                            },
                            "html_instructions": "Turn <b>right</b> onto <b>E 70th St</b>",
                            "maneuver": "turn-right",
                            "polyline": {
                                "points": "geywFpvkbMDOGE`A}C`A}CP_@Tw@H_@FDDOGEf@aB"
                            },
                            "travel_mode": "WALKING"
                        },
                        {
                            "distance": {
                                "text": "33 ft",
                                "value": 10
                            },
                            "duration": {
                                "text": "1 min",
                                "value": 12
                            },
                            "start_location": {
                                "lat": 40.7653118,
                                "lng": -73.9541886
                            },
                            "end_location": {
                                "lat": 40.7654247,
                                "lng": -73.9541035
                            },
                            "html_instructions": "Turn <b>left</b>",
                            "maneuver": "turn-left",
                            "polyline": {
                                "points": "e~xwFtdkbMKIIG"
                            },
                            "travel_mode": "WALKING"
                        }
                    ],
                    "traffic_speed_entry": [],
                    "via_waypoint": []
                }
            ],
            "overview_polyline": {
                "points": "{{vwF`lmbMuA}@GRKGFUk@_@cAq@ITKGHW_BeAOKITKGFUu@e@y@i@GRKIFS[SuA}@GTKIFS[U}AcAGRKGFSyByAIRKGFSu@i@{@k@ITKIHUa@Y_Am@KGITKIFUeAs@i@]IVKIHWQKUOOIw@i@ITKIDO@Es@e@}@k@GTMIHSgBmAGEITKGHUo@a@aAo@ITKIHU{@i@s@g@ITKGFUw@i@y@i@GTMGHUmBoAGRMIHSqBqAIRKGFSEE_BgADOGEbC{Hf@wAH_@FDDOGEf@aBKIIG"
            },
            "summary": "1st Ave.",
            "warnings": [
                "Walking directions are in beta. Use caution – This route may be missing sidewalks or pedestrian paths."
            ],
            "waypoint_order": []
        }
    ],
    "status": "OK"
}

# Google Maps Directions API Response

## Geocoded Waypoints
- **Geocoded Waypoints**:
  - **Waypoint 1**:
    - `geocoder_status`: `OK`
    - `place_id`: `ChIJJxxBK-JYwokRSl6QjuCG_Y0`
    - `types`: `['street_address']`
  - **Waypoint 2**:
    - `geocoder_status`: `OK`
    - `place_id`: `ChIJGVrZ-8NYwokRlkCW1UJv0Gc`
    - `types`: `['street_address']`

## Routes
- **Routes**:
  - **Route 1**:
    - **Bounds**:
      - `northeast`: 
        - `lat`: `40.7664511`
        - `lng`: `-73.9541035`
      - `southwest`: 
        - `lat`: `40.7547012`
        - `lng`: `-73.9656129`
    - **Copyrights**: `Map data ©2024 Google`
    - **Legs**:
      - **Leg 1**:
        - **Distance**:
          - `text`: `1.1 mi`
          - `value`: `1779`
        - **Duration**:
          - `text`: `25 mins`
          - `value`: `1504`
        - **Start Address**: `931 1st Ave., New York, NY 10022, USA`
        - **End Address**: `515 E 70th St, New York, NY 10021, USA`
        - **Start Location**:
          - `lat`: `40.7547012`
          - `lng`: `-73.9656129`
        - **End Location**:
          - `lat`: `40.7654247`
          - `lng`: `-73.9541035`
        - **Steps**:
          - **Step 1**:
            - **Distance**:
              - `text`: `0.9 mi`
              - `value`: `1501`
            - **Duration**:
              - `text`: `21 mins`
              - `value`: `1282`
            - **Start Location**:
              - `lat`: `40.7547012`
              - `lng`: `-73.9656129`
            - **End Location**:
              - `lat`: `40.7664445`
              - `lng`: `-73.95705439999999`
            - **Instructions**: `Head northeast on 1st Ave. toward E 52nd St`
            - **Polyline**: `{{vwF`lmbMuA}@GRKGFU]UMIcAq@ITKGHW[QcAs@OKITKGFUQMc@Wa@YWOGRKIFS[SGEmAw@GTKIFS[UIEsA}@GRKGFSyByAIRKGFSu@i@]U]UITKIHUa@Ya@W]UCAGEITKIFUeAs@i@]IVIGAAHWQKGEMIKGCAAAu@g@ADGNKIDO@Es@e@_@U]UGTMIHSgBmAGEITKGHUSM[SaAo@ITKIHUw@g@CA[UWQITKGFUw@i@YS_@UGTMGHUmBoAGRMIHSqBqAIRKGFSEE{@k@c@[`
            - **Travel Mode**: `WALKING`
          - **Step 2**:
            - **Distance**:
              - `text`: `0.2 mi`
              - `value`: `268`
            - **Duration**:
              - `text`: `4 mins`
              - `value`: `210`
            - **Start Location**:
              - `lat`: `40.7664445`
              - `lng`: `-73.95705439999999`
            - **End Location**:
              - `lat`: `40.7653118`
              - `lng`: `-73.9541886`
            - **Instructions**: `Turn right onto E 70th St`
            - **Maneuver**: `turn-right`
            - **Polyline**: `geywFpvkbMDOGE`A}C`A}CP_@Tw@H_@FDDOGEf@aB`
            - **Travel Mode**: `WALKING`
          - **Step 3**:
            - **Distance**:
              - `text`: `33 ft`
              - `value`: `10`
            - **Duration**:
              - `text`: `1 min`
              - `value`: `12`
            - **Start Location**:
              - `lat`: `40.7653118`
              - `lng`: `-73.9541886`
            - **End Location**:
              - `lat`: `40.7654247`
              - `lng`: `-73.9541035`
            - **Instructions**: `Turn left`
            - **Maneuver**: `turn-left`
            - **Polyline**: `e~xwFtdkbMKIIG`
            - **Travel Mode**: `WALKING`
    - **Overview Polyline**:
      - `points`: `{{vwF`lmbMuA}@GRKGFUk@_@cAq@ITKGHW_BeAOKITKGFUu@e@y@i@GRKIFS[SuA}@GTKIFS[U}AcAGRKGFSyByAIRKGFSu@i@{@k@ITKIHUa@Y_Am@KGITKIFUeAs@i@]IVKIHWQKUOOIw@i@ITKIDO@Es@e@}@k@GTMIHSgBmAGEITKGHUo@a@aAo@ITKIHU{@i@s@g@ITKGFUw@i@y@i@GTMGHUmBoAGRMIHSqBqAIRKGFSEE_BgADOGEbC{Hf@wAH_@FDDOGEf@aBKIIG`
    - **Summary**: `1st Ave.`
    - **Warnings**:
      - `"Walking directions are in beta. Use caution – This route may be missing sidewalks or pedestrian paths."`
    - **Waypoint Order**: `[]`

## Status
- **Status**: `OK`


In [14]:
from ipyleaflet import *
from ipywidgets import  HTML

In [17]:
m = Map(center=(40.7128, -74.0060), zoom=14)

click_pos=[]
markers=[]
polyline_layer=[]

walking_time_wigdet = HTML()

icon_start = AwesomeIcon(
    name='play',
    marker_color='red',
    icon_color='black',
    spin=False
)

icon_stop = AwesomeIcon(
    name='stop-circle',
    marker_color='blue',
    icon_color='black',
    spin=False
)

def markers_handler(**kwargs):
    
    click_pos.append(kwargs.get('coordinates'))
    
    if len(click_pos)==3:
        m.remove_layer(markers[0])
        m.remove_layer(markers[1])
        m.remove_layer(polyline_layer[0])

        click_pos.pop(0)
        markers.pop(0)
        polyline_layer.pop(0)

        marker_start=Marker(location=click_pos[0],icon=icon_start)
        marker_stop=Marker(location=click_pos[1],icon=icon_stop)
        
        m.add(marker_start)
        m.add(marker_stop)
        markers[0]=marker_start
        markers.append(marker_stop)

    
    elif len(click_pos)==2:
        marker_stop=Marker(location=click_pos[1],icon=icon_stop)
        m.add(marker_stop)
        markers.append(marker_stop)

        walking_time_control = WidgetControl(widget=walking_time_wigdet, position='topright')
        m.add_control(walking_time_control)
    else:
        marker_single=Marker(location=click_pos[0],icon=icon_start)
        m.add(marker_single)
        markers.append(marker_single)
    
    if len(click_pos)>=2:
        route_data = get_route_data(click_pos[0], click_pos[1], api_key)
        walking_time, walking_time_seconds = extract_walking_time(route_data)

        walking_time_wigdet.value = f"<b>Estimated walking time:</b> {walking_time}"

        polyline_layer.append(Polyline(locations=decode_polyline(extract_polyline(route_data)), color="blue", fill=False))
        m.add_layer(polyline_layer[0])


def handle_click(**kwargs):
    if kwargs.get('type') == 'click':
        markers_handler(**kwargs)

m.on_interaction(handle_click)

m

Label(value='')

Map(center=[40.7128, -74.006], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zo…

Estimated walking time: 20 mins
Estimated walking time: 13 mins
Estimated walking time: 18 mins
Estimated walking time: 25 mins
Estimated walking time: 19 mins
