In [1]:
import IJulia

# The julia kernel has built in support for Revise.jl, so this is the 
# recommended approach for long-running sessions:
# https://github.com/JuliaLang/IJulia.jl/blob/9b10fa9b879574bbf720f5285029e07758e50a5e/src/kernel.jl#L46-L51

# Users should enable revise within .julia/config/startup_ijulia.jl:
# https://timholy.github.io/Revise.jl/stable/config/#Using-Revise-automatically-within-Jupyter/IJulia-1

# clear console history
IJulia.clear_history()

fig_width = 7
fig_height = 5
fig_format = :retina
fig_dpi = 96

# no retina format type, use svg for high quality type/marks
if fig_format == :retina
  fig_format = :svg
elseif fig_format == :pdf
  fig_dpi = 96
  # Enable PDF support for IJulia
  IJulia.register_mime(MIME("application/pdf"))
end

# convert inches to pixels
fig_width = fig_width * fig_dpi
fig_height = fig_height * fig_dpi

# Intialize Plots w/ default fig width/height
try
  import Plots

  # Plots.jl doesn't support PDF output for versions < 1.28.1
  # so use png (if the DPI remains the default of 300 then set to 96)
  if (Plots._current_plots_version < v"1.28.1") & (fig_format == :pdf)
    Plots.gr(size=(fig_width, fig_height), fmt = :png, dpi = fig_dpi)
  else
    Plots.gr(size=(fig_width, fig_height), fmt = fig_format, dpi = fig_dpi)
  end
catch e
  # @warn "Plots init" exception=(e, catch_backtrace())
end

# Initialize CairoMakie with default fig width/height
try
  import CairoMakie
  
  CairoMakie.activate!(type = string(fig_format))
  CairoMakie.update_theme!(resolution=(fig_width, fig_height))
catch e
    # @warn "CairoMakie init" exception=(e, catch_backtrace())
end
  
# Set run_path if specified
try
  run_path = raw"/Users/krise/Documents/GitHub/private/tidier4ds"
  if !isempty(run_path)
    cd(run_path)
  end
catch e
  @warn "Run path init:" exception=(e, catch_backtrace())
end


# emulate old Pkg.installed beahvior, see
# https://discourse.julialang.org/t/how-to-use-pkg-dependencies-instead-of-pkg-installed/36416/9
import Pkg
function isinstalled(pkg::String)
  any(x -> x.name == pkg && x.is_direct_dep, values(Pkg.dependencies()))
end

# ojs_define
if isinstalled("JSON") && isinstalled("DataFrames")
  import JSON, DataFrames
  global function ojs_define(; kwargs...)
    convert(x) = x
    convert(x::DataFrames.AbstractDataFrame) = Tables.rows(x)
    content = Dict("contents" => [Dict("name" => k, "value" => convert(v)) for (k, v) in kwargs])
    tag = "<script type='ojs-define'>$(JSON.json(content))</script>"
    IJulia.display(MIME("text/html"), tag)
  end
elseif isinstalled("JSON")
  import JSON
  global function ojs_define(; kwargs...)
    content = Dict("contents" => [Dict("name" => k, "value" => v) for (k, v) in kwargs])
    tag = "<script type='ojs-define'>$(JSON.json(content))</script>"
    IJulia.display(MIME("text/html"), tag)
  end
else
  global function ojs_define(; kwargs...)
    @warn "JSON package not available. Please install the JSON.jl package to use ojs_define."
  end
end


# don't return kernel dependencies (b/c Revise should take care of dependencies)
nothing


In [2]:
#| eval: false
# https://r4ds.hadley.nz/data-transform.html
# status: worked through examples, but not all functions are supported, some ideas open to propose on GitHub
using Tidier, Feather

flights = DataFrame(Feather.read("../data/flights.feather"))

@glimpse(flights)

@chain flights begin
    @filter(dest == "IAH")
    @group_by(year, month, day)
    @summarize(
        arr_delay = mean(skipmissing(arr_delay))
    )
    @ungroup
end

# filter
@chain flights begin
    @filter(dep_delay > 120)
end

@chain flights begin
    @filter(month == 1 && day == 1) 
end

@chain flights begin
    @filter(month == 1 || day == 1) 
end

@chain flights begin
    @filter(month in (1, 2)) 
end

jan1 = @chain flights begin
    @filter(month == 1 && day == 1)
end

# arrange
@chain flights begin
    @arrange(year, month, day, dep_time)
end

@chain flights begin
    @arrange(desc(dep_delay))
end

# distinct
@chain flights begin
    @distinct()
end

@chain flights begin
    @distinct(origin, dest)
end

## idea: to replicate .keep_all = FALSE option via implicit @select()? 
@chain flights begin
    @distinct(origin, dest)
    @select(origin, dest)
end

# count
## idea: replicate sort=TRUE behavior via implicit @arrange(desc(n))?
@chain flights begin
    @count(origin, dest, sort = true)
    @arrange(desc(n))
end# mutate
@chain flights begin
    @mutate(
        gain = dep_delay - arr_delay,
        speed = distance / air_time * 60
    )
end

# question: is it possible to refer to other variable that is being created in the same mutate() call?
@chain flights begin
    @mutate(
        gain = dep_delay - arr_delay,
        hours = air_time / 60,
        gain_per_hour = gain / hours # This does not work!
    )
end

# select
@chain flights begin
    @select(year, month, day)
end

@chain flights begin
    @select(year:day) 
end

@chain flights begin
    @select(!year) 
end

# idea: replicate where function?
@chain flights begin
    @select(where(is_string)) # does not work
end

# starts_with("abc"): matches names that begin with “abc” -> supported
# ends_with("xyz"): matches names that end with “xyz” -> supported
# contains("ijk"): matches names that contain “ijk” -> supported
# num_range("x", 1:3): matches x1, x2 and x3 -> NOT supported

@chain flights begin
    @select(tail_num = tailnum)
end

# rename
@chain flights begin
    @rename(tail_num = tailnum)
end

# relocate (not supported)

# groups 
@chain flights begin
    @group_by(month)
end

@chain flights begin
    @group_by(month)
    @summarize(
        avg_delay = mean(skipmissing(dep_delay)),
        n = n()
    )
end

# slice
# only slice currently supported, no slice_* functions