# 1. Initialize

### Libraries and Spark context

In [2]:
import pyspark
from pyspark.sql import SparkSession
from probetrip.visualize import StatVis

In [None]:
spark = SparkSession.builder.master("yarn").appName("6_visualize_stat") \
        .config("spark.executor.cores", "2").config("spark.driver.cores", "1") \
        .config("spark.executor.memory", "4g").config("spark.driver.memory", "2g") \
        .config("spark.executor.instances", "2") \
        .config("spark.dynamicAllocation.enabled", "true") \
        .config("spark.dynamicAllocation.minExecutors", "1").config("spark.dynamicAllocation.maxExecutors", "3") \
        .config("spark.sql.catalogImplementation", "hive") \
        .config("metastore.catalog.default", "hive") \
        .enableHiveSupport() \
        .getOrCreate()

In [45]:
spark.stop()

### Required parameter

In [None]:
database_name = "probe_taxi"
trip_table = "trip_point"
od_table = "od_occupied"
speed_acc_table = "speed_acc"
spark_stat_table = "spark_stat"
month = '202301'
trip_date = '2023-01-01' # only use for speeding area due to cannot plot on whole month

# 2. Execution

In [None]:
sv = StatVis(spark, database_name, trip_table, od_table, speed_acc_table)

### Travel pattern

In [None]:
fig = sv.total_trip(month)
fig.savefig('result/total_trip.png')

In [None]:
fig = sv.trip_distribution('distance', month)
fig.savefig('result/distance_distribution.png')

In [None]:
fig = sv.trip_distribution('duration', month)
fig.savefig('result/duration_distribution.png')

In [None]:
fig = sv.od_heatmap('Bangkok', month)
fig.savefig('result/od_heatmap.png')

### Accessibility

In [None]:
sv.od_density_map('Bangkok', 'Origin', month, eps=100, min_samples=40)

In [None]:
sv.od_density_map('Bangkok', 'Destination', month, eps=100, min_samples=40)

In [None]:
sv.od_density_map_overall('Bangkok', month, eps=100, min_samples=40)

### Speed and safety

In [None]:
fig = sv.trip_distribution('speed', month)
fig.savefig('result/speed_distribution.png')

In [None]:
sv.speed_acc_area('speed', trip_date, 5)

In [None]:
sv.speed_acc_area('acc', trip_date, 5)

### Stay spot

In [None]:
fig = sv.stay_hourly_volume(month)
fig.savefig('result/stay_hourly_volume.png')

In [None]:
sv.stay_spot('<15', month, eps=100, min_samples=30)

In [None]:
sv.stay_spot_overall(['<=15', '15-200', '>200'], month, [100, 100, 100], [30, 130, 300])

### Spark statistic

In [None]:
fig = sv.spark_stat([[2,1,4], [2,2,4], [3,2,4], [2,4,8], [3,3,6]])
fig.savefig('result/spark_stat.png')