# Create trelliscope figures to view flights and airports data

# Install missing packages

Conda only allows us to install conda packages for R. However often we'll need packages from CRAN, in such cases we can still installed those packages from CRAN within the activated conda environment. These installations should be locally persistent within the environment.

In [1]:
packages <- c("trelliscopejs", "plotly")
install.packages(setdiff(packages, rownames(installed.packages())))

# Libraries

In [2]:
library(trelliscopejs)
library(plotly)
library(tidyverse)
library(scales)


"package 'plotly' was built under R version 3.6.3"Loading required package: ggplot2
"package 'ggplot2' was built under R version 3.6.3"
Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2
-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v tibble  2.1.1       v purrr   0.3.2  
v tidyr   0.8.3       v dplyr   0.8.0.1
v readr   1.3.1       v stringr 1.4.0  
v tibble  2.1.1       v forcats 0.4.0  
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks plotly::filter(), stats::filter()
x dplyr::lag()    masks stats::lag()

Attaching package: 'scales'

The following object is masked from 'package:purrr':

    discard

The following object

# Read data

In [3]:
df_flights <- read.csv("../lvt-schiphol-assignment-snakemake/data/raw/flights.csv")
df_airports <- read.csv("../lvt-schiphol-assignment-snakemake/data/raw/airports.csv")

# Process flights

In [4]:
df_flights %>% head()

actualOffBlockTime,aircraftRegistration,aircraftType.iatamain,aircraftType.iatasub,airlineCode,baggageClaim,estimatedLandingTime,expectedTimeBoarding,expectedTimeGateClosing,expectedTimeGateOpen,...,prefixICAO,publicEstimatedOffBlockTime,publicFlightState.flightStates,route.destinations,scheduleDate,scheduleTime,serviceType,terminal,transferPositions,transferPositions.transferPositions
,,,,148,,,,,,...,ZXP,,['SCH'],['AMS'],2018-01-01,03:02:07,P,,,
,PHPXY,AW1,,148,,,,,,...,ZXP,,['SCH'],['AMS'],2018-01-01,03:16:00,,,,
,,AW1,,148,,,,,,...,ZXP,,['SCH'],['AMS'],2018-01-01,03:16:29,P,,,
2018-01-01T03:22:00.000+01:00,PHPXB,,,148,,,,,,...,ZXP,,['DEP'],['AMS'],2018-01-01,03:30:00,,,,
2018-01-01T05:58:22.000+01:00,PHHSJ,73H,73H,164,,,,,,...,TRA,,['DEP'],['SPC'],2018-01-01,06:00:00,J,1.0,,
2018-01-01T06:00:00.000+01:00,PHHSG,73H,73H,100,,,,,,...,KLM,,['DEP'],['LPA'],2018-01-01,06:05:00,J,1.0,,


In [5]:
schedule_dates <- strptime(df_flights["scheduleDate"], format = "%Y%m%d")
schedule_times <- strptime(df_flights["scheduleTime"], format = "%Y%m%d")

In [6]:
df_flights["scheduleDate"][0,]

In [11]:

df_flights2 <- df_flights %>%
    filter(actualOffBlockTime != "") %>%
#     head(1000) %>% 
    mutate(
        actualOffBlockTime = as.POSIXct(actualOffBlockTime,
                                        format="%Y-%m-%dT%H:%M:%S",
                                        tz="Europe/Amsterdam"),
        scheduleDateTime = as.POSIXct(paste0(scheduleDate, scheduleTime),
                                      format = "%Y-%m-%d%H:%M:%S ",
                                      tz="Europe/Amsterdam"),
        scheduleDelaySeconds = difftime(actualOffBlockTime, scheduleDateTime, units='secs')
    )

df_flights2 %>% head()

actualOffBlockTime,aircraftRegistration,aircraftType.iatamain,aircraftType.iatasub,airlineCode,baggageClaim,estimatedLandingTime,expectedTimeBoarding,expectedTimeGateClosing,expectedTimeGateOpen,...,publicFlightState.flightStates,route.destinations,scheduleDate,scheduleTime,serviceType,terminal,transferPositions,transferPositions.transferPositions,scheduleDateTime,scheduleDelaySeconds
2018-01-01 03:22:00,PHPXB,,,148,,,,,,...,['DEP'],['AMS'],2018-01-01,03:30:00,,,,,2018-01-01 03:30:00,-480 secs
2018-01-01 05:58:22,PHHSJ,73H,73H,164,,,,,,...,['DEP'],['SPC'],2018-01-01,06:00:00,J,1.0,,,2018-01-01 06:00:00,-98 secs
2018-01-01 06:00:00,PHHSG,73H,73H,100,,,,,,...,['DEP'],['LPA'],2018-01-01,06:05:00,J,1.0,,,2018-01-01 06:05:00,-300 secs
2018-01-01 06:00:00,PHHSG,73H,73H,164,,,,,,...,['DEP'],['LPA'],2018-01-01,06:05:00,J,1.0,,,2018-01-01 06:05:00,-300 secs
2018-01-01 06:26:34,PHHXB,73H,73H,164,,,,,,...,['DEP'],['TLV'],2018-01-01,06:15:00,J,1.0,,,2018-01-01 06:15:00,694 secs
2018-01-01 06:30:11,PHHZN,73H,73H,100,,,,,,...,['DEP'],['FAO'],2018-01-01,06:20:00,J,1.0,,,2018-01-01 06:20:00,611 secs


In [13]:
create_delay_scatter <- function(x) {
  fig <- plot_ly(x, x = ~scheduleDateTime, y = ~scheduleDelaySeconds,
                 type = 'scatter', mode = 'markers')
#   fig <- layout(fig)
  fig
}

df_plotly <- df_flights2 %>%
  group_by(serviceType, airlineCode) %>%
  nest() %>%
  mutate(plotly_fig = map_plot(data, create_delay_scatter))

kek <- df_plotly %>% trelliscope(name="Delays Type/Airline", path="trelliscopes")

df_flights2 %>% head(10000) %>%
    ggplot(data=., aes(x=scheduleDateTime, y=scheduleDelaySeconds)) + geom_point() + 
    facet_trelliscope(~serviceType + airlineCode, path="trelliscopes", name="flights", as_plotly=TRUE)

                                                                        