## Load the NYC Taxi data into the Spark nyctaxi database


In [None]:
%%spark
spark.sql("CREATE DATABASE IF NOT EXISTS nyctaxi")
val df = spark.read.sqlanalytics("SQLPOOL1.dbo.Trip") 
df.write.mode("overwrite").saveAsTable("nyctaxi.trip")

## Analyze the NYC Taxi data using Spark and notebooks


In [None]:
%%pyspark
df = spark.sql("SELECT * FROM nyctaxi.trip") 
display(df)

In [None]:
%%pyspark
df = spark.sql("""
   SELECT PassengerCount,
       SUM(TripDistanceMiles) as SumTripDistance,
       AVG(TripDistanceMiles) as AvgTripDistance
   FROM nyctaxi.trip
   WHERE TripDistanceMiles > 0 AND PassengerCount > 0
   GROUP BY PassengerCount
   ORDER BY PassengerCount
""") 
display(df)
df.write.saveAsTable("nyctaxi.passengercountstats")

## Load data from a Spark table into a dedicated SQL pool table

In [None]:
%%spark
val df = spark.sql("SELECT * FROM nyctaxi.passengercountstats")
df.write.sqlanalytics("SQLPOOL1.dbo.PassengerCountStats", Constants.INTERNAL )