# TidyVerse : syntaxes équivalentes avec le base R

Let's first follow this link : https://tavareshugo.github.io/data_carpentry_extras/base-r_tidyverse_equivalents/base-r_tidyverse_equivalents.html

In [1]:
library(dplyr)
library(tidyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




## Extraction de colonnes

In [2]:
select(iris, Species, Petal.Width) # by name
select(iris, 5, 4)  # by column index

Species,Petal.Width
<fct>,<dbl>
setosa,0.2
setosa,0.2
setosa,0.2
setosa,0.2
setosa,0.2
setosa,0.4
setosa,0.3
setosa,0.2
setosa,0.2
setosa,0.1


Species,Petal.Width
<fct>,<dbl>
setosa,0.2
setosa,0.2
setosa,0.2
setosa,0.2
setosa,0.2
setosa,0.4
setosa,0.3
setosa,0.2
setosa,0.2
setosa,0.1


## Création de nouvelles colonnes

In [3]:
mutate(iris, 
       Petal.Ratio = Petal.Length/Petal.Width,
       Sepal.Ratio = Sepal.Length/Sepal.Width)

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species,Petal.Ratio,Sepal.Ratio
<dbl>,<dbl>,<dbl>,<dbl>,<fct>,<dbl>,<dbl>
5.1,3.5,1.4,0.2,setosa,7.000000,1.457143
4.9,3.0,1.4,0.2,setosa,7.000000,1.633333
4.7,3.2,1.3,0.2,setosa,6.500000,1.468750
4.6,3.1,1.5,0.2,setosa,7.500000,1.483871
5.0,3.6,1.4,0.2,setosa,7.000000,1.388889
5.4,3.9,1.7,0.4,setosa,4.250000,1.384615
4.6,3.4,1.4,0.3,setosa,4.666667,1.352941
5.0,3.4,1.5,0.2,setosa,7.500000,1.470588
4.4,2.9,1.4,0.2,setosa,7.000000,1.517241
4.9,3.1,1.5,0.1,setosa,15.000000,1.580645


## Extraction de lignes

In [4]:
filter(iris, Petal.Width > 0.5 & Species == "setosa")

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
<dbl>,<dbl>,<dbl>,<dbl>,<fct>
5,3.5,1.6,0.6,setosa


## Arrangement de l'extraction de lignes (tri ascendant et descendant)

In [5]:
# descending order of species (alphabetic) followed by ascending order of Petal.Width
arrange(iris, desc(Species), Petal.Width) 

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
<dbl>,<dbl>,<dbl>,<dbl>,<fct>
6.1,2.6,5.6,1.4,virginica
6.0,2.2,5.0,1.5,virginica
6.3,2.8,5.1,1.5,virginica
7.2,3.0,5.8,1.6,virginica
4.9,2.5,4.5,1.7,virginica
6.3,2.9,5.6,1.8,virginica
7.3,2.9,6.3,1.8,virginica
6.7,2.5,5.8,1.8,virginica
6.5,3.0,5.5,1.8,virginica
6.3,2.7,4.9,1.8,virginica


## Résumé des observations

In [6]:
# Generic way
summarise(iris, 
          Petal.Length.mean = mean(Petal.Length),
          Petal.Length.sd = sd(Petal.Length),
          Sepal.Length.mean = mean(Sepal.Length),
          Sepal.Length.sd = sd(Sepal.Length))

# Shortcut when same functions applied to same variables 
summarise_at(iris, 
             .vars = c("Petal.Length", "Sepal.Length"), 
             .funs = c("mean", "sd"))

Petal.Length.mean,Petal.Length.sd,Sepal.Length.mean,Sepal.Length.sd
<dbl>,<dbl>,<dbl>,<dbl>
3.758,1.765298,5.843333,0.8280661


Petal.Length_mean,Sepal.Length_mean,Petal.Length_sd,Sepal.Length_sd
<dbl>,<dbl>,<dbl>,<dbl>
3.758,5.843333,1.765298,0.8280661


## Jointures

In [7]:
# Retain rows with matches in both tables
inner_join(band_members, band_instruments, by = "name")  

# Retain all rows:
full_join(band_members, band_instruments, by = "name")  

# Retain all rows from first table:
left_join(band_members, band_instruments, by = "name")  

# Retain all rows from second table:
right_join(band_members, band_instruments, by = "name")  

name,band,plays
<chr>,<chr>,<chr>
John,Beatles,guitar
Paul,Beatles,bass


name,band,plays
<chr>,<chr>,<chr>
Mick,Stones,
John,Beatles,guitar
Paul,Beatles,bass
Keith,,guitar


name,band,plays
<chr>,<chr>,<chr>
Mick,Stones,
John,Beatles,guitar
Paul,Beatles,bass


name,band,plays
<chr>,<chr>,<chr>
John,Beatles,guitar
Paul,Beatles,bass
Keith,,guitar


In [8]:
inner_join(band_members, band_instruments2, by = c("name" = "artist"))

name,band,plays
<chr>,<chr>,<chr>
John,Beatles,guitar
Paul,Beatles,bass


## Opérations groupées

### Résumé de lignes par groupes

In [9]:
mtcars %>% 
  group_by(cyl, gear) %>% 
  summarise(mpg.mean = mean(mpg),
            mpg.sd = sd(mpg),
            wt.mean = mean(wt),
            wt.sd = sd(wt)) %>% 
  ungroup() # remove any groupings from downstream analysis

`summarise()` regrouping output by 'cyl' (override with `.groups` argument)



cyl,gear,mpg.mean,mpg.sd,wt.mean,wt.sd
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
4,3,21.5,,2.465,
4,4,26.925,4.8073604,2.378125,0.6006243
4,5,28.2,3.1112698,1.8265,0.443356
6,3,19.75,2.3334524,3.3375,0.1732412
6,4,19.75,1.5524175,3.09375,0.413146
6,5,19.7,,2.77,
8,3,15.05,2.7743959,4.104083,0.7683069
8,5,15.4,0.5656854,3.37,0.2828427


### Création d'une nouvelle colonne avec des calculs par groupes

In [10]:
iris %>% 
  group_by(Species) %>% 
  mutate(Petal.Width.centered = Petal.Width - mean(Petal.Width)) %>% 
  ungroup() # remove any groupings from downstream analysis

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species,Petal.Width.centered
<dbl>,<dbl>,<dbl>,<dbl>,<fct>,<dbl>
5.1,3.5,1.4,0.2,setosa,-0.046
4.9,3.0,1.4,0.2,setosa,-0.046
4.7,3.2,1.3,0.2,setosa,-0.046
4.6,3.1,1.5,0.2,setosa,-0.046
5.0,3.6,1.4,0.2,setosa,-0.046
5.4,3.9,1.7,0.4,setosa,0.154
4.6,3.4,1.4,0.3,setosa,0.054
5.0,3.4,1.5,0.2,setosa,-0.046
4.4,2.9,1.4,0.2,setosa,-0.046
4.9,3.1,1.5,0.1,setosa,-0.146


### Filtrage de lignes avec conditions par groupes

In [11]:
iris %>% 
  group_by(Species) %>% 
  filter(Petal.Width == max(Petal.Width))

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
<dbl>,<dbl>,<dbl>,<dbl>,<fct>
5.0,3.5,1.6,0.6,setosa
5.9,3.2,4.8,1.8,versicolor
6.3,3.3,6.0,2.5,virginica
7.2,3.6,6.1,2.5,virginica
6.7,3.3,5.7,2.5,virginica


## Modification de la forme des données

### Au format "long"

In [12]:
gather(iris, key = "trait", value = "measurement", Sepal.Length:Petal.Width)

Species,trait,measurement
<fct>,<chr>,<dbl>
setosa,Sepal.Length,5.1
setosa,Sepal.Length,4.9
setosa,Sepal.Length,4.7
setosa,Sepal.Length,4.6
setosa,Sepal.Length,5.0
setosa,Sepal.Length,5.4
setosa,Sepal.Length,4.6
setosa,Sepal.Length,5.0
setosa,Sepal.Length,4.4
setosa,Sepal.Length,4.9


### Au format "wide"

In [13]:
spread(Indometh, key = "time", value = "conc")

Subject,0.25,0.5,0.75,1,1.25,2,3,4,5,6,8
<ord>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1.5,0.94,0.78,0.48,0.37,0.19,0.12,0.11,0.08,0.07,0.05
4,1.85,1.39,1.02,0.89,0.59,0.4,0.16,0.11,0.1,0.07,0.07
2,2.03,1.63,0.71,0.7,0.64,0.36,0.32,0.2,0.25,0.12,0.08
5,2.05,1.04,0.81,0.39,0.3,0.23,0.13,0.11,0.08,0.1,0.06
6,2.31,1.44,1.03,0.84,0.64,0.42,0.24,0.17,0.13,0.1,0.09
3,2.72,1.49,1.16,0.8,0.8,0.39,0.22,0.12,0.11,0.08,0.08
