# Red Amber Examples

This notebook walks through the [README of RedAmber](https://github.com/heronshoes/red_amber#readme).

## `RedAmber::DataFrame`

In [1]:
require "red_amber"
require "datasets-arrow"

true

In [2]:
arrow = Datasets::Penguins.new.to_arrow
penguins = RedAmber::DataFrame.new(arrow)

species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
Adelie,Torgersen,39.1,18.7,181,3750,male,2007
Adelie,Torgersen,39.5,17.4,186,3800,female,2007
Adelie,Torgersen,40.3,18.0,195,3250,female,2007
Adelie,Torgersen,(nil),(nil),(nil),(nil),(nil),2007
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
Gentoo,Biscoe,50.4,15.7,222,5750,male,2009
Gentoo,Biscoe,45.2,14.8,212,5200,female,2009
Gentoo,Biscoe,49.9,16.1,213,5400,male,2009


In [3]:
penguins.keys

[:species, :island, :bill_length_mm, :bill_depth_mm, :flipper_length_mm, :body_mass_g, :sex, :year]

In [4]:
df = penguins.pick(:species, :island, :body_mass_g)
df

species,island,body_mass_g
Adelie,Torgersen,3750
Adelie,Torgersen,3800
Adelie,Torgersen,3250
Adelie,Torgersen,(nil)
⋮,⋮,⋮
Gentoo,Biscoe,5750
Gentoo,Biscoe,5200
Gentoo,Biscoe,5400


In [5]:
df = df.drop(true, true, false)

body_mass_g
3750
3800
3250
(nil)
⋮
5750
5200
5400


In [6]:
df.assign(:body_mass_kg => df[:body_mass_g] / 1000.0)

body_mass_g,body_mass_kg
3750,3.75
3800,3.8
3250,3.25
(nil),(nil)
⋮,⋮
5750,5.75
5200,5.2
5400,5.4


In [7]:
penguins.slice(0...5, -5..-1)

species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
Adelie,Torgersen,39.1,18.7,181,3750,male,2007
Adelie,Torgersen,39.5,17.4,186,3800,female,2007
Adelie,Torgersen,40.3,18.0,195,3250,female,2007
Adelie,Torgersen,(nil),(nil),(nil),(nil),(nil),2007
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
Gentoo,Biscoe,50.4,15.7,222,5750,male,2009
Gentoo,Biscoe,45.2,14.8,212,5200,female,2009
Gentoo,Biscoe,49.9,16.1,213,5400,male,2009


In [8]:

penguins.remove(penguins[:bill_length_mm] < 40)

species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
Adelie,Torgersen,40.3,18.0,195,3250,female,2007
Adelie,Torgersen,(nil),(nil),(nil),(nil),(nil),2007
Adelie,Torgersen,42.0,20.2,190,4250,(nil),2007
Adelie,Torgersen,41.1,17.6,182,3200,female,2007
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
Gentoo,Biscoe,50.4,15.7,222,5750,male,2009
Gentoo,Biscoe,45.2,14.8,212,5200,female,2009
Gentoo,Biscoe,49.9,16.1,213,5400,male,2009


In [9]:
df = RedAmber::DataFrame.new(
  integer: [0, 1, 2, 3, nil],
  float:   [0.0, 1.1, 2.2, Float::NAN, nil],
  string:  ["A", "B", "C", "D", nil],
  boolean: [true, false, true, false, nil]
)
df

integer,float,string,boolean
0,0.0,A,true
1,1.1,B,false
2,2.2,C,true
3,,D,false
(nil),(nil),(nil),(nil)


In [10]:
df.assign do
  vectors.select(&:float?).map {|v| [v.key, -v]}
end

integer,float,string,boolean
0,-0.0,A,true
1,-1.1,B,false
2,-2.2,C,true
3,,D,false
(nil),(nil),(nil),(nil)


In [11]:
nil_removed = penguins.remove { vectors.map(&:is_nil).reduce(&:|) }
nil_removed.tdr

RedAmber::DataFrame : 333 x 8 Vectors
Vectors : 5 numeric, 3 strings
# key                type   level data_preview
1 :species           string     3 {"Adelie"=>146, "Chinstrap"=>68, "Gentoo"=>119}
2 :island            string     3 {"Torgersen"=>47, "Biscoe"=>163, "Dream"=>123}
3 :bill_length_mm    double   163 [39.1, 39.5, 40.3, 36.7, 39.3, ... ]
4 :bill_depth_mm     double    79 [18.7, 17.4, 18.0, 19.3, 20.6, ... ]
5 :flipper_length_mm uint8     54 [181, 186, 195, 193, 190, ... ]
6 :body_mass_g       uint16    93 [3750, 3800, 3250, 3450, 3650, ... ]
7 :sex               string     2 {"male"=>168, "female"=>165}
8 :year              uint16     3 {2007=>103, 2008=>113, 2009=>117}


In [12]:
penguins.remove_nil

species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
Adelie,Torgersen,39.1,18.7,181,3750,male,2007
Adelie,Torgersen,39.5,17.4,186,3800,female,2007
Adelie,Torgersen,40.3,18.0,195,3250,female,2007
Adelie,Torgersen,36.7,19.3,193,3450,female,2007
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
Gentoo,Biscoe,50.4,15.7,222,5750,male,2009
Gentoo,Biscoe,45.2,14.8,212,5200,female,2009
Gentoo,Biscoe,49.9,16.1,213,5400,male,2009


In [14]:
penguins.summary

variables,count,mean,std,min,25%,median,75%,max
bill_length_mm,342,43.92192982456141,5.459583713926532,32.1,39.225,44.382000000000005,48.5,59.6
bill_depth_mm,342,17.151169590643274,1.9747931568167811,13.1,15.6,17.32,18.7,21.5
flipper_length_mm,342,200.91520467836256,14.061713679356888,172.0,190.0,197.0,213.0,231.0
body_mass_g,342,4201.754385964912,801.9545356980955,2700.0,3550.0,4031.5,4750.0,6300.0
year,344,2008.0290697674416,0.8183559254837041,2007.0,2007.0,2008.0,2009.0,2009.0


In [15]:
starwars = RedAmber::DataFrame.load(URI("https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv"))
starwars

unnamed1,name,height,mass,hair_color,skin_color,eye_color,birth_year,sex,gender,homeworld,species
1,Luke Skywalker,172,77.0,blond,fair,blue,19.0,male,masculine,Tatooine,Human
2,C-3PO,167,75.0,,gold,yellow,112.0,none,masculine,Tatooine,Droid
3,R2-D2,96,32.0,,"white, blue",red,33.0,none,masculine,Naboo,Droid
4,Darth Vader,202,136.0,none,white,yellow,41.9,male,masculine,Tatooine,Human
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
85,BB8,(nil),(nil),none,none,black,(nil),none,masculine,,Droid
86,Captain Phasma,(nil),(nil),unknown,unknown,unknown,(nil),,,,
87,Padmé Amidala,165,45.0,brown,light,brown,46.0,female,feminine,Naboo,Human


In [16]:
grouped = starwars.group(:species) {[count(:species), mean(:height, :mass)]}
grouped.slice { v(:count) > 1 }

species,count,mean(height),mean(mass)
Human,35,176.6451612903226,82.78181818181818
Droid,6,131.2,69.75
Wookiee,2,231.0,124.0
Gungan,3,208.66666666666666,74.0
⋮,⋮,⋮,⋮
Twi'lek,2,179.0,55.0
Mirialan,2,168.0,53.1
Kaminoan,2,221.0,88.0


## `RedAmber::Vector`

In [18]:
penguins[:bill_length_mm]

#<RedAmber::Vector(:double, size=344):0x000000000000f1cc>
[39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]


In [19]:
penguins[:bill_length_mm] < 40

#<RedAmber::Vector(:boolean, size=344):0x000000000000f1e0>
[true, true, false, nil, true, true, true, true, true, false, true, true, false, ... ]


In [20]:
penguins[:bill_length_mm].mean

43.92192982456141