# XGBoost DataSet Exploration & Validation

## 1. Load Jars & Configuration

In [2]:
%AddJar file:///home/jovyan/work/apps/Emiasd-Flight-Data-Analysis.jar

Starting download from file:///home/jovyan/work/apps/Emiasd-Flight-Data-Analysis.jar
Finished download of Emiasd-Flight-Data-Analysis.jar
Using cached version of Emiasd-Flight-Data-Analysis.jar


In [3]:
import org.apache.spark.sql.SparkSession
import com.flightdelay.config.{AppConfiguration, ConfigurationLoader, ExperimentConfig}
import com.flightdelay.data.loaders.FlightDataLoader

// Env Configuration
val args: Array[String] = Array("jupyter")
implicit val configuration: AppConfiguration = ConfigurationLoader.loadConfiguration(args)
implicit val experimentConfig: ExperimentConfig = configuration.experiments(0)

val spark = SparkSession.builder()
  .config(sc.getConf)
  .config("spark.eventLog.enabled", "true")
  .config("spark.eventLog.dir", s"${configuration.common.output.basePath}/spark-events")  // ex: "file:/tmp/spark-events" ou "hdfs:///spark-events"
  .getOrCreate()

// Rendre la session Spark implicite
implicit val session = spark


args = Array(jupyter)
configuration = AppConfiguration(local,CommonConfig(42,true,debug,false,false,DataConfig(/home/jovyan/work/data,FileConfig(/home/jovyan/work/data/FLIGHT-3Y/Flights/201201*.csv),FileConfig(/home/jovyan/work/data/FLIGHT-3Y/Weather/20101*.txt),FileConfig(/home/jovyan/work/data/FLIGHT-3Y/wban_airport_timezone.csv)),OutputConfig(/home/jovyan/work/output,FileConfig(/home/jovyan/work/output/data),FileConfig(/home/jovyan/work/output/model),None),MLFlowConfig(false,http://localhost:5555),/scripts),Stream(ExperimentConfig(Experience-local,Ba...


AppConfiguration(local,CommonConfig(42,true,debug,false,false,DataConfig(/home/jovyan/work/data,FileConfig(/home/jovyan/work/data/FLIGHT-3Y/Flights/201201*.csv),FileConfig(/home/jovyan/work/data/FLIGHT-3Y/Weather/20101*.txt),FileConfig(/home/jovyan/work/data/FLIGHT-3Y/wban_airport_timezone.csv)),OutputConfig(/home/jovyan/work/output,FileConfig(/home/jovyan/work/output/data),FileConfig(/home/jovyan/work/output/model),None),MLFlowConfig(false,http://localhost:5555),/scripts),Stream(ExperimentConfig(Experience-local,Ba...

## 2. Load DataSets

In [13]:
val trainDFPath = s"${configuration.common.output.basePath}/Experience-local-D-60-7-7/data/join_exploded_train_prepared.parquet"
val trainData = spark.read.parquet(trainDFPath)

println("Train DF Count: ", trainData.count())

(Train DF Count: ,117971)


trainDFPath = /home/jovyan/work/output/Experience-local-D-60-7-7/data/join_exploded_train_prepared.parquet
trainData = [CRS_ARR_TIME: string, CRS_DEP_TIME: int ... 246 more fields]


[CRS_ARR_TIME: string, CRS_DEP_TIME: int ... 246 more fields]

In [14]:
val testDFPath = s"${configuration.common.output.basePath}/Experience-local-D-60-7-7/data/join_exploded_test_prepared.parquet"
val testData = spark.read.parquet(testDFPath)

println("Test DF Count: ", testData.count())

(Test DF Count: ,39339)


testDFPath = /home/jovyan/work/output/Experience-local-D-60-7-7/data/join_exploded_test_prepared.parquet
testData = [CRS_ARR_TIME: string, CRS_DEP_TIME: int ... 246 more fields]


[CRS_ARR_TIME: string, CRS_DEP_TIME: int ... 246 more fields]

## 3. Check DataSet Qaulity

### 3.1 Distribution des classes (déséquilibre)

In [16]:
trainDf.groupBy("is_delayed").count().show()
testDf.groupBy("is_delayed").count().show()

+----------+-----+
|is_delayed|count|
+----------+-----+
|         1|58965|
|         0|59006|
+----------+-----+

+----------+-----+
|is_delayed|count|
+----------+-----+
|         1|19693|
|         0|19646|
+----------+-----+



### 3.2 Cohérence train / test

In [22]:
import org.apache.spark.sql.functions._

val trainRatio =
  trainDf.groupBy("is_delayed").count()
    .withColumn("ratio", col("count") / trainDf.count())

val testRatio =
  testDf.groupBy("is_delayed").count()
    .withColumn("ratio", col("count") / testDf.count())

trainRatio.show()
testRatio.show()

+----------+-----+------------------+
|is_delayed|count|             ratio|
+----------+-----+------------------+
|         1|58965|0.4998262284798807|
|         0|59006|0.5001737715201193|
+----------+-----+------------------+

+----------+-----+------------------+
|is_delayed|count|             ratio|
+----------+-----+------------------+
|         1|19693|0.5005973715651135|
|         0|19646|0.4994026284348865|
+----------+-----+------------------+



trainRatio = [is_delayed: int, count: bigint ... 1 more field]
testRatio = [is_delayed: int, count: bigint ... 1 more field]


[is_delayed: int, count: bigint ... 1 more field]

### 3.3 Null Values

In [23]:
import org.apache.spark.sql.functions._

val nullStats = trainDf.columns.map { c =>
  trainDf.filter(col(c).isNull).count().toDouble / trainDf.count()
}

trainDf.columns.zip(nullStats).foreach(println)

(CRS_ARR_TIME,0.0)
(CRS_DEP_TIME,0.0)
(CRS_ELAPSED_TIME,0.0)
(D1,0.0)
(D2_15,0.0)
(D2_30,0.0)
(D2_45,0.0)
(D2_60,0.0)
(D2_90,0.0)
(D3,0.0)
(D4,0.0)
(DEST_AIRPORT_ID,0.0)
(DEST_WBAN,0.0)
(FL_DATE,0.0)
(OP_CARRIER_AIRLINE_ID,0.0)
(OP_CARRIER_FL_NUM,0.0)
(ORIGIN_AIRPORT_ID,0.0)
(ORIGIN_WBAN,0.0)
(UTC_CRS_DEP_TIME,0.0)
(UTC_FL_DATE,0.0)
(depHour,0.0)
(feature_arrival_time_period,0.0)
(feature_departure_hour_rounded_cos,0.0)
(feature_departure_hour_rounded_sin,0.0)
(feature_departure_time_period,0.0)
(feature_flight_unique_id,0.0)
(feature_flight_week_of_year_cos,0.0)
(feature_flight_week_of_year_sin,0.0)
(feature_utc_departure_hour_rounded,0.0)
(is_delayed,0.0)
(destination_weather_hour_h1,0.008595332751269379)
(destination_weather_WBAN_h1,0.008595332751269379)
(destination_weather_WDATE_h1,0.008595332751269379)
(destination_weather_WTIME_HHMM_h1,0.008595332751269379)
(destination_weather_feature_visibility_category_h1,0.008595332751269379)
(destination_weather_Temp_Delta_1hr_h1,0.00951928

nullStats = Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.008595332751269379, 0.008595332751269379, 0.008595332751269379, 0.008595332751269379, 0.008595332751269379, 0.009519288638733248, 0.008595332751269379, 0.00915479227945851, 0.008595332751269379, 0.008866585855845929, 0.008595332751269379, 0.008595332751269379, 0.008595332751269379, 0.008595332751269379, 0.010722974290291681, 0.0071712539522425, 0.0071712539522425, 0.0071712539522425, 0.0071712539522425, 0.0071712539522425, 0.008069779861152317, 0.0071712539522425, 0.0077646201185037, 0.0071712539522425, 0.007484890354409134, 0.0071712539522425, 0.0071712539522425, 0...


Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.008595332751269379, 0.008595332751269379, 0.008595332751269379, 0.008595332751269379, 0.008595332751269379, 0.009519288638733248, 0.008595332751269379, 0.00915479227945851, 0.008595332751269379, 0.008866585855845929, 0.008595332751269379, 0.008595332751269379, 0.008595332751269379, 0.008595332751269379, 0.010722974290291681, 0.0071712539522425, 0.0071712539522425, 0.0071712539522425, 0.0071712539522425, 0.0071712539522425, 0.008069779861152317, 0.0071712539522425, 0.0077646201185037, 0.0071712539522425, 0.007484890354409134, 0.0071712539522425, 0.0071712539522425, 0...

In [24]:
import org.apache.spark.sql.functions._

val nullStats = testDf.columns.map { c =>
  testDf.filter(col(c).isNull).count().toDouble / testDf.count()
}

testDf.columns.zip(nullStats).foreach(println)

(CRS_ARR_TIME,0.0)
(CRS_DEP_TIME,0.0)
(CRS_ELAPSED_TIME,0.0)
(D1,0.0)
(D2_15,0.0)
(D2_30,0.0)
(D2_45,0.0)
(D2_60,0.0)
(D2_90,0.0)
(D3,0.0)
(D4,0.0)
(DEST_AIRPORT_ID,0.0)
(DEST_WBAN,0.0)
(FL_DATE,0.0)
(OP_CARRIER_AIRLINE_ID,0.0)
(OP_CARRIER_FL_NUM,0.0)
(ORIGIN_AIRPORT_ID,0.0)
(ORIGIN_WBAN,0.0)
(UTC_CRS_DEP_TIME,0.0)
(UTC_FL_DATE,0.0)
(depHour,0.0)
(feature_arrival_time_period,0.0)
(feature_departure_hour_rounded_cos,0.0)
(feature_departure_hour_rounded_sin,0.0)
(feature_departure_time_period,0.0)
(feature_flight_unique_id,0.0)
(feature_flight_week_of_year_cos,0.0)
(feature_flight_week_of_year_sin,0.0)
(feature_utc_departure_hour_rounded,0.0)
(is_delayed,0.0)
(destination_weather_hour_h1,0.008668242710795903)
(destination_weather_WBAN_h1,0.008668242710795903)
(destination_weather_WDATE_h1,0.008668242710795903)
(destination_weather_WTIME_HHMM_h1,0.008668242710795903)
(destination_weather_feature_visibility_category_h1,0.008668242710795903)
(destination_weather_Temp_Delta_1hr_h1,0.00976130

nullStats = Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.008668242710795903, 0.008668242710795903, 0.008668242710795903, 0.008668242710795903, 0.008668242710795903, 0.009761305574620606, 0.008668242710795903, 0.00932916444241084, 0.008668242710795903, 0.009151223976206819, 0.008668242710795903, 0.008668242710795903, 0.008668242710795903, 0.008668242710795903, 0.010905208571646457, 0.007371819314166603, 0.007371819314166603, 0.007371819314166603, 0.007371819314166603, 0.007371819314166603, 0.008439462111390732, 0.007371819314166603, 0.007956480845979816, 0.007371819314166603, 0.007778540379775795, 0.007371819314166603, 0.0...


Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.008668242710795903, 0.008668242710795903, 0.008668242710795903, 0.008668242710795903, 0.008668242710795903, 0.009761305574620606, 0.008668242710795903, 0.00932916444241084, 0.008668242710795903, 0.009151223976206819, 0.008668242710795903, 0.008668242710795903, 0.008668242710795903, 0.008668242710795903, 0.010905208571646457, 0.007371819314166603, 0.007371819314166603, 0.007371819314166603, 0.007371819314166603, 0.007371819314166603, 0.008439462111390732, 0.007371819314166603, 0.007956480845979816, 0.007371819314166603, 0.007778540379775795, 0.007371819314166603, 0.0...

### 3.4 Traitements des Null Values

In [25]:
import org.apache.spark.sql.{DataFrame}
import org.apache.spark.sql.functions._

def addMissingColumnFlag(
  df: DataFrame,
  colName: String,
  suffix: String = "_missing"
): DataFrame = {

  require(df.columns.contains(colName), s"Column not found: $colName")

  df.withColumn(
    s"${colName}${suffix}",
    col(colName).isNull.cast("int")
  )
}

def missingRates(df: DataFrame): Map[String, Double] = {
  val total = df.count().toDouble

  df.columns.map { c =>
    val missing = df.filter(col(c).isNull).count().toDouble
    c -> (missing / total)
  }.toMap
}

def addMissingFlagsAboveThreshold(
  df: DataFrame,
  threshold: Double,
  excludeCols: Set[String] = Set.empty
): DataFrame = {

  import org.apache.spark.sql.functions._

  val rates = missingRates(df)

  val colsToFlag =
    rates
      .filter { case (col, rate) =>
        rate >= threshold && !excludeCols.contains(col)
      }
      .keys
      .toSeq

  println(s"Columns flagged (missing >= $threshold):")
  colsToFlag.foreach(println)

  colsToFlag.foldLeft(df) { (accDf, colName) =>
    addMissingColumnFlag(accDf, colName)
  }
}

val threshold = 0.2

val trainWithFlags =
  addMissingFlagsAboveThreshold(
    trainDf,
    threshold = threshold,
    excludeCols = Set("is_delayed")
  )

val testWithFlags =
  addMissingFlagsAboveThreshold(
    testDf,
    threshold = threshold,
    excludeCols = Set("is_delayed")
  )

Columns flagged (missing >= 0.2):
destination_weather_press_change_abs_Max
origin_weather_press_change_abs_Max
Columns flagged (missing >= 0.2):
destination_weather_press_change_abs_Max
origin_weather_press_change_abs_Max


threshold = 0.2
trainWithFlags = [CRS_ARR_TIME: string, CRS_DEP_TIME: int ... 248 more fields]
testWithFlags = [CRS_ARR_TIME: string, CRS_DEP_TIME: int ... 248 more fields]


addMissingColumnFlag: (df: org.apache.spark.sql.DataFrame, colName: String, suffix: String)org.apache.spark.sql.DataFrame
missingRates: (df: org.apache.spark.sql.DataFrame)Map[String,Double]
addMissingFlagsAboveThreshold: (df: org.apache.spark.sql.DataFrame, threshold: Double, excludeCols: Set[String])org.apache.spark.sql.DataFrame


[CRS_ARR_TIME: string, CRS_DEP_TIME: int ... 248 more fields]

### 3.5 Valeurs invalides déguisées

### 3.6 Imputation de valeurs sentinelle

In [28]:
import org.apache.spark.sql.{DataFrame}
import org.apache.spark.sql.functions._

def addFlagAndImputeSentinel(
  df: DataFrame,
  colName: String,
  sentinel: Double = -1.0
): DataFrame = {
  df.withColumn(s"${colName}_missing", col(colName).isNull.cast("int"))
    .withColumn(colName, when(col(colName).isNull, lit(sentinel)).otherwise(col(colName)))
}

val trainFinal =
  addFlagAndImputeSentinel(trainWithFlags, "origin_weather_press_change_abs_Max", -1.0)
    .transform(df => addFlagAndImputeSentinel(df, "destination_weather_press_change_abs_Max", -1.0))

val testFinal =
  addFlagAndImputeSentinel(testWithFlags, "origin_weather_press_change_abs_Max", -1.0)
    .transform(df => addFlagAndImputeSentinel(df, "destination_weather_press_change_abs_Max", -1.0))

trainFinal = [CRS_ARR_TIME: string, CRS_DEP_TIME: int ... 248 more fields]
testFinal = [CRS_ARR_TIME: string, CRS_DEP_TIME: int ... 248 more fields]


addFlagAndImputeSentinel: (df: org.apache.spark.sql.DataFrame, colName: String, sentinel: Double)org.apache.spark.sql.DataFrame


[CRS_ARR_TIME: string, CRS_DEP_TIME: int ... 248 more fields]

In [35]:
 import org.apache.spark.sql.functions._

val totalNulls = trainFinal.select(
  trainFinal.columns.map(c => sum(when(col(c).isNull, 1).otherwise(0)).alias(c)):_*
).selectExpr(trainFinal.columns.map(c => s"`$c`"):_*)
 .first()
 .toSeq
 .map(_.asInstanceOf[Long])
 .sum

println(s"TOTAL NULL CELLS (train) = $totalNulls")

TOTAL NULL CELLS (train) = 209364


totalNulls = 209364


209364

In [36]:
val totalNullsTest = testFinal.select(
  testFinal.columns.map(c => sum(when(col(c).isNull, 1).otherwise(0)).alias(c)):_*
).first().toSeq.map(_.asInstanceOf[Long]).sum

println(s"TOTAL NULL CELLS (test) = $totalNullsTest")

TOTAL NULL CELLS (test) = 71644


totalNullsTest = 71644


71644

In [38]:
import org.apache.spark.sql.functions._

val nullCountsTrain = trainFinal.select(
  trainFinal.columns.map(c => sum(when(col(c).isNull, 1).otherwise(0)).alias(c)):_*
)

val topNullsTrain =
  nullCountsTrain
    .first()
    .toSeq
    .zip(trainFinal.columns)
    .map { case (v, c) => (c, v.asInstanceOf[Long]) }
    .filter(_._2 > 0)
    .sortBy(-_._2)
    .take(50)

topNullsTrain.foreach { case (c, n) => println(f"$n%10d  $c") }

      1554  origin_weather_Humidity_Delta_1hr_h7
      1552  origin_weather_Humidity_Delta_1hr_h6
      1499  origin_weather_Humidity_Delta_1hr_h5
      1398  origin_weather_Humidity_Delta_1hr_h4
      1352  destination_weather_Humidity_Delta_1hr_h7
      1314  origin_weather_Temp_Delta_1hr_h7
      1304  destination_weather_Humidity_Delta_1hr_h6
      1287  origin_weather_Temp_Delta_1hr_h6
      1271  origin_weather_Humidity_Delta_1hr_h3
      1265  destination_weather_Humidity_Delta_1hr_h1
      1233  origin_weather_Temp_Delta_1hr_h5
      1220  origin_weather_WindSpeed_Delta_1hr_h7
      1200  origin_weather_WindSpeed_Delta_1hr_h6
      1199  destination_weather_Humidity_Delta_1hr_h5
      1184  origin_weather_Visibility_Delta_1hr_h7
      1177  origin_weather_Temp_Delta_1hr_h4
      1175  origin_weather_WindSpeed_Delta_1hr_h5
      1172  destination_weather_Temp_Delta_1hr_h7
      1159  origin_weather_Visibility_Delta_1hr_h6
      1138  origin_weather_Humidity_Delta_1hr_h2
      11

nullCountsTrain = [CRS_ARR_TIME: bigint, CRS_DEP_TIME: bigint ... 248 more fields]
topNullsTrain = ArrayBuffer((origin_weather_Humidity_Delta_1hr_h7,1554), (origin_weather_Humidity_Delta_1hr_h6,1552), (origin_weather_Humidity_Delta_1hr_h5,1499), (origin_weather_Humidity_Delta_1hr_h4,1398), (destination_weather_Humidity_Delta_1hr_h7,1352), (origin_weather_Temp_Delta_1hr_h7,1314), (destination_weather_Humidity_Delta_1hr_h6,1304), (origin_weather_Temp_Delta_1hr_h6,1287), (origin_weather_Humidity_Delta_1hr_h3,1271), (destination_weather_Humidity_Delta_1hr_h1,1265), (origin_weather_Temp_Delta_1hr_h5,1233), (origin_weather_WindSpeed_Delta_1hr_h7,1220), (origin_weather_WindSpeed_Delta_1hr_h6,1200), (de...


ArrayBuffer((origin_weather_Humidity_Delta_1hr_h7,1554), (origin_weather_Humidity_Delta_1hr_h6,1552), (origin_weather_Humidity_Delta_1hr_h5,1499), (origin_weather_Humidity_Delta_1hr_h4,1398), (destination_weather_Humidity_Delta_1hr_h7,1352), (origin_weather_Temp_Delta_1hr_h7,1314), (destination_weather_Humidity_Delta_1hr_h6,1304), (origin_weather_Temp_Delta_1hr_h6,1287), (origin_weather_Humidity_Delta_1hr_h3,1271), (destination_weather_Humidity_Delta_1hr_h1,1265), (origin_weather_Temp_Delta_1hr_h5,1233), (origin_weather_WindSpeed_Delta_1hr_h7,1220), (origin_weather_WindSpeed_Delta_1hr_h6,1200), (de...

In [39]:
val nullCountsTest = testFinal.select(
  testFinal.columns.map(c => sum(when(col(c).isNull, 1).otherwise(0)).alias(c)):_*
)

val topNullsTest =
  nullCountsTest
    .first()
    .toSeq
    .zip(testFinal.columns)
    .map { case (v, c) => (c, v.asInstanceOf[Long]) }
    .filter(_._2 > 0)
    .sortBy(-_._2)
    .take(50)

topNullsTest.foreach { case (c, n) => println(f"$n%10d  $c") }

       511  origin_weather_Humidity_Delta_1hr_h5
       511  origin_weather_Humidity_Delta_1hr_h6
       505  origin_weather_Humidity_Delta_1hr_h7
       475  origin_weather_Humidity_Delta_1hr_h4
       453  destination_weather_Humidity_Delta_1hr_h7
       436  destination_weather_Humidity_Delta_1hr_h6
       434  origin_weather_Temp_Delta_1hr_h6
       429  destination_weather_Humidity_Delta_1hr_h1
       428  origin_weather_Temp_Delta_1hr_h7
       425  origin_weather_Temp_Delta_1hr_h5
       413  origin_weather_WindSpeed_Delta_1hr_h7
       409  origin_weather_Humidity_Delta_1hr_h3
       406  origin_weather_WindSpeed_Delta_1hr_h5
       403  destination_weather_Humidity_Delta_1hr_h5
       401  origin_weather_Temp_Delta_1hr_h4
       401  origin_weather_WindSpeed_Delta_1hr_h6
       399  origin_weather_Visibility_Delta_1hr_h7
       393  origin_weather_Visibility_Delta_1hr_h6
       392  origin_weather_WindSpeed_Delta_1hr_h4
       391  destination_weather_Temp_Delta_1hr_h7
       

nullCountsTest = [CRS_ARR_TIME: bigint, CRS_DEP_TIME: bigint ... 248 more fields]
topNullsTest = ArrayBuffer((origin_weather_Humidity_Delta_1hr_h5,511), (origin_weather_Humidity_Delta_1hr_h6,511), (origin_weather_Humidity_Delta_1hr_h7,505), (origin_weather_Humidity_Delta_1hr_h4,475), (destination_weather_Humidity_Delta_1hr_h7,453), (destination_weather_Humidity_Delta_1hr_h6,436), (origin_weather_Temp_Delta_1hr_h6,434), (destination_weather_Humidity_Delta_1hr_h1,429), (origin_weather_Temp_Delta_1hr_h7,428), (origin_weather_Temp_Delta_1hr_h5,425), (origin_weather_WindSpeed_Delta_1hr_h7,413), (origin_weather_Humidity_Delta_1hr_h3,409), (origin_weather_WindSpeed_Delta_1hr_h5,406), (destination_weather_Humidity_Delta_1hr_h5,403), (origin_w...


ArrayBuffer((origin_weather_Humidity_Delta_1hr_h5,511), (origin_weather_Humidity_Delta_1hr_h6,511), (origin_weather_Humidity_Delta_1hr_h7,505), (origin_weather_Humidity_Delta_1hr_h4,475), (destination_weather_Humidity_Delta_1hr_h7,453), (destination_weather_Humidity_Delta_1hr_h6,436), (origin_weather_Temp_Delta_1hr_h6,434), (destination_weather_Humidity_Delta_1hr_h1,429), (origin_weather_Temp_Delta_1hr_h7,428), (origin_weather_Temp_Delta_1hr_h5,425), (origin_weather_WindSpeed_Delta_1hr_h7,413), (origin_weather_Humidity_Delta_1hr_h3,409), (origin_weather_WindSpeed_Delta_1hr_h5,406), (destination_weather_Humidity_Delta_1hr_h5,403), (origin_w...