<span style="color:blue">Thanks for using Drogon for your interactive Spark application. We update Drogon/SparkMagic as often as possible to make it easier, faster and more reliable for you. Have a question or feedback? Ping us on [uChat](https://uchat.uberinternal.com/uber/channels/spark).</span>

What's New
- Now you can use `%%configure` and `%%spark` magics to configure and start a Spark session (deprecating hard-to-use `%load_ext sparkmagic.magics` and `manage_spark` magics). Check out [this example](https://workbench.uberinternal.com/explore/knowledge/localfile/cwang/sparkmagic_python2_example.ipynb) for more details.
- Improved `%%configure` magic. You now can use it to make all Spark and Drogon configurations from within notebook itself. Check out our [latest documentation & examples](https://docs.google.com/document/d/1mkYtDHquh4FjqTeA0Fxii8lyV-P6qzmoABhmmRwm_00/edit#heading=h.xn14pmoorsn0) for more details.
- Bug fixes and performance updates.


In [None]:
// This script is to identify the trips passing through a given sequence of segments

In [None]:
%%configure -f
{
  "kind": "spark", 
  "proxyUser": "dhruven.vora", 
  "sparkEnv": "SPARK_24", 
  "driverMemory": "12g", 
  "queue": "maps_route_analytics", 
  "numExecutors": 300, 
  "executorCores": 2, 
  "driverCores": 2,
  "conf": {
    "spark.driver.maxResultSize": "10g",
    "spark.executor.memoryOverhead": 3072, 
    "spark.locality.wait": "0",
    "spark.default.parallelism":10000
  },
  "executorMemory": "24g",
  "drogonHeaders": {
    "X-DROGON-CLUSTER": "PHX2/Secure"
  }
}

In [None]:
%%spark

In [None]:
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
import spark.implicits._
import org.apache.spark.sql.functions._
import scala.collection.mutable.ListBuffer
import org.apache.spark.sql._

case class GpsPoint(
//     endJunctionUuid: String,
    segmentUuid: String
//     startJunctionUuid: String
)

case class ActualPolylineInfo (
    tripUuid: String,
    gpsPoints: List[GpsPoint],
    waypointTaskType: String,
    driverUuid: String,
    buildUuid: String,
    dataProvider: String,
    vehicleType: String
)

case class RouteCorpus (
    actualPolylineInfo: ActualPolylineInfo
)

case class Trip (
    tripUuid: String,
    segments: List[String]
)

In [None]:
object AllDone extends Exception { }

In [None]:
// find trips with segments
val trips = spark.read.parquet("/app/route_corpus_features/route_corpus/daily/date=2023-01-*").as[RouteCorpus]
trips.printSchema()

In [None]:
val derivedTrips = trips.
map(r => {
    val points = r.actualPolylineInfo.gpsPoints
    var segments = ListBuffer[String]()
    
    points.foreach(point => segments += point.segmentUuid)
    
    Trip(tripUuid = r.actualPolylineInfo.tripUuid, segments = segments.toList.distinct)
}).cache()

In [None]:
val result = derivedTrips.
filter(t => {
    
    var flag = false
    
    if(t.segments.length >= 3) {
    
        var segment1 = ""
        var segment2 = t.segments.apply(1)
        var segment3 = t.segments.apply(2)

        try {
            for(i <- 2 until t.segments.length) {
                segment1 = segment2
                segment2 = segment3
                segment3 = t.segments.apply(i)

                if(segment1 == "fef65886-b01e-2f8a-a1ad-b8f8e48c4fd7" &&
                  segment2 == "aec44bb4-e716-99f6-0e49-f8f0efd8e5e4" && 
                  segment3 == "79d49cc2-fa1b-c43a-129b-b1a9956bfabf"
                  ) {
                    throw AllDone
                }
            }
        } catch {
            case AllDone => flag = true
        }
    }
    
    flag
    
}).
map(t => t.tripUuid)

result.collect().foreach(println)