# Learning R

## Chapter 2

# Mathematical Operations and Vectors

In [1]:
1:5 + 6:10

In [2]:
c(1, 3, 6, 10, 15) + c(0, 1, 3, 6, 10)

In [3]:
sum(1:5)

In [4]:
median(1:5)

In [5]:
sum(1, 2, 3, 4, 5)

In [6]:
median(1, 2, 3, 4, 5)

ERROR: Error in median(1, 2, 3, 4, 5): unused arguments (3, 4, 5)


In [7]:
c(2, 3, 5, 7, 11, 13) - 2

In [8]:
-2:2 * -2:2

In [9]:
identical(2^3, 2**3)

In [10]:
1:10 / 3

In [12]:
# Integer division
1:10 %/% 3

In [13]:
# Remainder after division
1:10 %% 3

In [14]:
# Pi as a built in constant
cos(c(0, pi/4, pi/2, pi))

In [15]:
# Euler's formula
exp(pi * 1i) + 1

In [16]:
factorial(7) + factorial(1) - 71 ^ 2

In [19]:
choose(5, 0:5)

In [20]:
sqrt(2) ^ 2 == 2

In [21]:
sqrt(2) ^ 2 - 2

In [22]:
all.equal(sqrt(2) ^ 2, 2)

In [23]:
all.equal(sqrt(2) ^ 2, 3)

In [24]:
isTRUE(all.equal(sqrt(2)^2, 3))

In [25]:
c(
    "Can",
    "you",
    "can",
    "a",
    "as"
) == "can"

In [26]:
c("A", "B", "C", "D") < "C"

# Assigning Variables

In [27]:
x <- 1:5

In [28]:
x

In [31]:
y = 6:10
y

In [30]:
x + 2 * y - 3

In [32]:
assign("my_local_variable", 9^3 + 10^3)

In [33]:
my_local_variable

In [34]:
assign("my_global_variable", 1^3 + 12^3, globalenv())

In [35]:
my_global_variable

In [36]:
z <- rnorm(5); z

In [37]:
(zz <- rlnorm(5))

# Special Numbers

In [38]:
c(Inf + 1, Inf - 1, Inf - Inf)

In [39]:
c(1 / Inf, Inf / 1, Inf / Inf)

In [41]:
c(sqrt(Inf), sin(Inf))

"NaNs produced"

In [42]:
c(log(Inf), log(Inf, base = Inf))

"NaNs produced"

In [43]:
c(NA + 1, NA * 5)

In [44]:
x <- c(0, Inf, -Inf, NaN, NA)

In [45]:
x

In [46]:
is.finite(x)

In [47]:
is.infinite(x)

In [48]:
is.nan(x)

In [49]:
is.na(x)

# Logical Vectors

In [50]:
(x <- 1:10 >= 5)

In [51]:
!x

In [52]:
(y <- 1:10 %% 2 == 0)

In [53]:
x & y

In [54]:
x | y

In [55]:
x <- c(TRUE, FALSE, NA)
# Get all combination of x and y
xy <- expand.grid(x = x, y = x)

within (
    xy,
    {
        and <- x & y
        or <- x | y
        not.y <- !y
        not.x <- !x
    }
)

x,y,not.x,not.y,or,and
True,True,False,False,True,True
False,True,True,False,True,False
,True,,False,True,
True,False,False,True,True,False
False,False,True,True,False,False
,False,,True,,False
True,,False,,True,
False,,True,,,False
,,,,,


## Chapter 3 
### Inspecting Variables in Workspace

In [56]:
class(c(TRUE, FALSE))

In [57]:
class(c(1, 1.00))

In [58]:
class(c("Hello"))

In [59]:
class(3+1i)

In [60]:
gender <- factor(c("male", "female", "male", "female"))

In [61]:
gender

In [62]:
levels(gender)

In [63]:
nlevels(gender)

In [67]:
# Doesn't work, need to use the factor function
hw <- c("hello", "world", "hello")
levels(hw)

hw <- factor(c("hello", "world", "hello"))
levels(hw)

NULL

In [68]:
as.integer(gender)

In [69]:
gender_char <- sample(c("female", "male"), 10000, replace = TRUE)

In [70]:
gender_fac <- as.factor(gender_char)

In [71]:
object.size(gender_char)

80136 bytes

In [72]:
object.size(gender_fac)

40512 bytes

In [73]:
as.character(gender)

In [74]:
as.raw(1:17)

 [1] 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11

In [75]:
as.raw(c(pi, 1+1i, -1, 256))

"out-of-range values treated as 0 in coercion to raw"

[1] 03 01 00 00

In [76]:
sushi <- charToRaw("Fish!")

In [77]:
sushi

[1] 46 69 73 68 21

In [78]:
bangla <- charToRaw("বাংলা")

In [79]:
bangla

 [1] 3c 55 2b 30 39 41 43 3e 3c 55 2b 30 39 42 45 3e 3c 55 2b 30 39 38 32 3e 3c
[26] 55 2b 30 39 42 32 3e 3c 55 2b 30 39 42 45 3e

In [82]:
class(sushi)

## Checking and Changing Class

```r
# Typical Checking Solution
if (!is(x, "some_class"))
{
    #corrective measures
}
```

In [83]:
is.character("red lorry, yellow lorry")

In [84]:
is.logical(TRUE)

In [85]:
is.list(list(a = 1, b = 2))

In [86]:
ls(pattern="^is", baseenv())

In [88]:
# String to numeric conversion
x <- "123.456"
y <- as(x, "numeric")
is.numeric(y)

In [89]:
# Throws error
y <- c(2, 12, 343, 34997)
as(y, "data.frame")
as.data.frame(y)

ERROR: Error in as(y, "data.frame"): no method or default for coercing "numeric" to "data.frame"


In [90]:
x <- "123.455"
class(x) <- "numeric"

In [92]:
is.numeric(x)

## Examining Variables

In [93]:
# Integer sequence -> http://oeis.org/A033951
ulams_spiral <- c(1, 8, 23, 46, 77)
for (i in ulams_spiral) print(i)

[1] 1
[1] 8
[1] 23
[1] 46
[1] 77


In [97]:
# Generating 30 random number within range [0, 1]
num <- runif(30)
summary(num)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.05384 0.22690 0.48860 0.47860 0.68970 0.99300 

In [98]:
# Sample function randomly samples values with `replace` 30 times
fac <- factor(sample(letters[1:5], 30, replace=TRUE))

In [100]:
summary(fac)

In [102]:
bool <- sample(c(TRUE, FALSE, NA), 30, replace=TRUE)
summary(bool)

   Mode   FALSE    TRUE    NA's 
logical       7      12      11 

In [103]:
# Summarization of dataframes
dfr <- data.frame(num, fac, bool)

In [105]:
head(dfr)

num,fac,bool
0.24906984,c,
0.98514727,e,False
0.57437965,d,
0.34823583,d,True
0.98970446,a,True
0.05384268,a,True


In [106]:
summary(dfr)

      num          fac      bool        
 Min.   :0.05384   a:8   Mode :logical  
 1st Qu.:0.22686   b:4   FALSE:7        
 Median :0.48864   c:5   TRUE :12       
 Mean   :0.47863   d:4   NA's :11       
 3rd Qu.:0.68967   e:9                  
 Max.   :0.99303                        

In [107]:
str(num)

 num [1:30] 0.249 0.985 0.574 0.348 0.99 ...


In [108]:
str(dfr)

'data.frame':	30 obs. of  3 variables:
 $ num : num  0.249 0.985 0.574 0.348 0.99 ...
 $ fac : Factor w/ 5 levels "a","b","c","d",..: 3 5 4 4 1 1 5 5 5 5 ...
 $ bool: logi  NA FALSE NA TRUE TRUE TRUE ...


In [109]:
unclass(fac)

In [110]:
attributes(fac)

In [113]:
# Visualizing 2D variables [Matrices / DataFrames]
# Doesn't work on R notebook
View(dfr)
new_dfr <- edit(dfr)
fix(dfr)

ERROR: Error in View(dfr): 'View()' not yet supported in the Jupyter R kernel


## The Workspace

In [122]:
# Create some variables to find
peach <- 1
plum <- 'fruity'
pear <- TRUE
ls()

In [125]:
# Finding the variables that contain 'ea'
ls(pattern="ea")

## Chapter 4
### Vectors, Matrices & Arrays

#### Vectors

In [127]:
# Sequence of numbers from 8.5 down to 4.5
8.5:4.5

In [128]:
# Values concatenated into single vector
c(1, 1:3, c(5, 8, 13))

In [129]:
vector("numeric", 5)

In [130]:
vector("complex", 5)

In [131]:
vector("logical", 5)

In [132]:
vector("character", 5)

In [133]:
vector("list", 5)

In [134]:
numeric(5)

In [135]:
complex(5)

In [136]:
logical(5)

In [137]:
character(5)

#### Sequences

In [138]:
seq.int(3, 12)

In [139]:
seq.int(.1, 0.01, -0.01)

In [140]:
n <- 0

In [141]:
1:n

In [143]:
print(seq_len(n))

integer(0)


In [145]:
# seq_along creates a sequence from 1 upto the length of its input
pp <- c("Peter", "Piper", "picked", "a", "peck", "of", "pickled", "peppers")
for (i in seq_along(pp))
    print(pp[i])

[1] "Peter"
[1] "Piper"
[1] "picked"
[1] "a"
[1] "peck"
[1] "of"
[1] "pickled"
[1] "peppers"


#### Lengths

In [146]:
length(1:5)

In [147]:
length(c(TRUE, FALSE))

In [151]:
# Get overall length
sn <- c("Hello", "R", "Data", "Science")
length(sn)

In [150]:
# Getting individual string length
nchar(sn)

In [154]:
# Assigning a new length to a vector
poincare <- c(1, 0, 0, 0, 2, 0, 2, 0) # #See http://oeis.org/A051629
length(poincare) <- 3
poincare

In [155]:
length(poincare) <- 8
print(poincare)

[1]  1  0  0 NA NA NA NA NA


#### Names

In [156]:
c(apple = 1, banana = 2, "kiwi fruit" = 3, 4)

In [159]:
# Adding element names to a vector after its creation
x <- 1:4
names(x) <- c("Apple", "banana", "kiwi fruit", "")
print(x)

     Apple     banana kiwi fruit            
         1          2          3          4 


In [160]:
# If vector has no element names the `names` function returns NULL
names(1:4)

NULL

#### Indexing Vectors

In [162]:
# Consider this vector
x <- (1:5) ^ 2; x

In [164]:
x[c(1, 3, 5)]

In [165]:
x[c(-2, -4)]

In [167]:
# Only TRUE indexed element will be returned
x[c(TRUE, FALSE, TRUE, FALSE, TRUE)]

In [168]:
names(x) <- c("one", "four", "nine", "sixteen", "twenty five")

In [170]:
x[c("one", "nine", "twenty five")]

In [172]:
# Doesn't make sense [Mixing positive and negative values is not allowed]
x[c(1, -1)]

ERROR: Error in x[c(1, -1)]: only 0's may be mixed with negative subscripts


In [173]:
x[c(1, NA, 5)]

In [174]:
x[c(TRUE, FALSE, NA, FALSE, TRUE)]

In [175]:
# This doesn't make sense either
x[c(-2, NA)]

ERROR: Error in x[c(-2, NA)]: only 0's may be mixed with negative subscripts


In [176]:
x[6]

In [178]:
# Passing fraction as indices still work [not recommended]
x[1.9]

In [179]:
x[-1.9]

#### Vector Recycling & Repitition

In [180]:
1:5 + 1

In [181]:
1 + 1:5

In [182]:
# Broadcasting
1:5 + 1:15

In [183]:
1:5 + 1:7

"longer object length is not a multiple of shorter object length"

In [184]:
# Repitition
rep(1:5, 3 )

In [185]:
rep(1:5, each=3)

In [186]:
rep(1:5, times=1:5)

In [187]:
rep(1:5, length.out= 7)

In [188]:
# same as rep(1:5, 3)
rep.int(1:5, 3)

In [189]:
rep_len(1:5, 13)

#### Matrices & Arrays

##### Creating Arrays & Matrices

In [190]:
(three_d_array <- array(
    1:24,
    dim = c(4, 3, 2),
    dimnames = list (
        c("one", "two", "three", "four"),
        c("ein", "zwei", "drei"),
        c("un", "deux")
    )
))

In [191]:
class(three_d_array)

In [192]:
(a_matrix <- matrix(
    1:12,
    nrow = 4,
    dimnames = list(
        c("one", "two", "three", "four"),
        c("ein", "zwei", "drei")
    )
))

Unnamed: 0,ein,zwei,drei
one,1,5,9
two,2,6,10
three,3,7,11
four,4,8,12


In [193]:
class(a_matrix)

In [196]:
(two_d_array <- array(
    1:12,
    dim = c(4, 3),
    dimnames = list (
        c("one", "two", "three", "four"),
        c("ein", "zwei", "drei")
    )
))

Unnamed: 0,ein,zwei,drei
one,1,5,9
two,2,6,10
three,3,7,11
four,4,8,12


In [197]:
identical(two_d_array, a_matrix)

In [199]:
matrix(
    1:12,
    nrow = 4,
    byrow = TRUE,
    dimnames = list (
        c("one", "two", "three", "four"),
        c("ein", "zwei", "drei")
    )
)

Unnamed: 0,ein,zwei,drei
one,1,2,3
two,4,5,6
three,7,8,9
four,10,11,12


#### Rows, Columns & Dimensions

In [200]:
dim(three_d_array)

In [201]:
dim(a_matrix)

In [202]:
nrow(a_matrix)

In [203]:
ncol(a_matrix)

In [204]:
nrow(three_d_array)

In [205]:
ncol(three_d_array)

In [206]:
length(three_d_array)

In [207]:
length(a_matrix)

In [208]:
dim(a_matrix) <- c(6, 2)

In [209]:
a_matrix

0,1
1,7
2,8
3,9
4,10
5,11
6,12


In [210]:
identical(nrow(a_matrix), NROW(a_matrix))

In [211]:
identical(ncol(a_matrix), NCOL(a_matrix))

In [212]:
recamen <- c(0, 1, 3, 6, 2, 7, 13, 20)

In [213]:
nrow(recamen)

NULL

In [214]:
NROW(recamen)

In [215]:
ncol(recamen)

NULL

In [216]:
NCOL(recamen)

In [217]:
dim(recamen)

NULL

#### Row, Column & Dimension Names

In [220]:
(a_matrix <- matrix(
    1:12,
    nrow = 4,
    dimnames = list(
        c("one", "two", "three", "four"),
        c("ein", "zwei", "drei")
    )
))

rownames(a_matrix)

Unnamed: 0,ein,zwei,drei
one,1,5,9
two,2,6,10
three,3,7,11
four,4,8,12


In [221]:
colnames(a_matrix)

In [222]:
dimnames(a_matrix)

In [223]:
rownames(three_d_array)

In [224]:
colnames(three_d_array)

#### Indexing Arrays

In [225]:
a_matrix[1, c("zwei", "drei")]

In [226]:
# Include all of a dimension
a_matrix[1, ]

In [227]:
a_matrix[, 1]

#### Combining Matrices

In [228]:
another_matrix <- matrix(
    seq.int(2, 24, 2),
    nrow = 4,
    dimnames = list (
        c("five", "six", "seven", "eigth"),
        c("vier", "funf", "sechs")
    )
)

In [229]:
another_matrix

Unnamed: 0,vier,funf,sechs
five,2,10,18
six,4,12,20
seven,6,14,22
eigth,8,16,24


In [230]:
c(a_matrix, another_matrix)

In [232]:
# Natural way to combine is to using `cbind` / `rbind`
# cbind is binding by column and rbind is by row
cbind(a_matrix, another_matrix)

Unnamed: 0,ein,zwei,drei,vier,funf,sechs
one,1,5,9,2,10,18
two,2,6,10,4,12,20
three,3,7,11,6,14,22
four,4,8,12,8,16,24


In [233]:
rbind(a_matrix, another_matrix)

Unnamed: 0,ein,zwei,drei
one,1,5,9
two,2,6,10
three,3,7,11
four,4,8,12
five,2,10,18
six,4,12,20
seven,6,14,22
eigth,8,16,24


#### Array Arithmetic

In [234]:
a_matrix + another_matrix

Unnamed: 0,ein,zwei,drei
one,3,15,27
two,6,18,30
three,9,21,33
four,12,24,36


In [235]:
a_matrix * another_matrix

Unnamed: 0,ein,zwei,drei
one,2,50,162
two,8,72,200
three,18,98,242
four,32,128,288


In [236]:
# Adding nonformable matrices throws an error
another_matrix <- matrix(1:12, nrow=2)
a_matrix + another_matrix

ERROR: Error in a_matrix + another_matrix: non-conformable arrays


In [238]:
# Transposing matrices
t(a_matrix)

Unnamed: 0,one,two,three,four
ein,1,2,3,4
zwei,5,6,7,8
drei,9,10,11,12


In [239]:
# Inner multiplication
a_matrix %*% t(a_matrix)

Unnamed: 0,one,two,three,four
one,107,122,137,152
two,122,140,158,176
three,137,158,179,200
four,152,176,200,224


In [240]:
# Outer multiplication
1:3 %o% 4:6

0,1,2
4,5,6
8,10,12
12,15,18


In [241]:
# Same as prev
outer(1:3, 4:6)

0,1,2
4,5,6
8,10,12
12,15,18


In [242]:
# Matrix inversion
m <- matrix(c(1, 0, 1, 5, -3, 1, 2, 4, 7), nrow=3)

In [244]:
m

0,1,2
1,5,2
0,-3,4
1,1,7


In [246]:
# This doesn't invert matrices
m^-1

0,1,2
1.0,0.2,0.5
inf,-0.3333333,0.25
1.0,1.0,0.1428571


In [248]:
# Inversion using `solve`
inverse_of_m <- solve(m); inverse_of_m

0,1,2
-25,-33,26
4,5,-4
3,4,-3


In [249]:
m %*% inverse_of_m

0,1,2
1,0,0
0,1,0
0,0,1


## Chapter 5
### Lists & Data Frames

#### Lists

In [250]:
a_list <- list(
    c(1, 1, 2, 5, 14, 42),
    month.abb,
    matrix(c(3, -8, 1, -3), nrow = 2),
    asin
)

In [251]:
a_list

0,1
3,1
-8,-3


In [252]:
names(a_list) <- c("catalan", "months", "involutary", "arcsin"); a_list

0,1
3,1
-8,-3


In [253]:
the_same_list <- list (
    catalan = c(1, 1, 2, 5, 14, 42),
    months = month.abb,
    involutary = matrix(c(3, -8, 1, -3), nrow = 2),
    arcsin = asin
)

In [254]:
the_same_list

0,1
3,1
-8,-3


In [255]:
main_list <- list (
    middle_list = list (
        element_in_middle_list = diag(3),
        inner_list = list (
            element_in_inner_list = pi ^ 1:4,
            another_element_in_inner_list = "a"
        )
    ),
    element_in_main_list = log10(1:10)
)

#### Atomic & Recursive Variables

**Vectors, matrices and arrays by contrast are atomic**

In [261]:
is.atomic(list())

In [262]:
is.recursive(list())

In [263]:
is.atomic(numeric())

In [264]:
is.recursive(numeric())

#### List Dimensions & Arithmetic

In [265]:
length(a_list)

In [266]:
length(main_list)

In [267]:
dim(a_list)

NULL

In [268]:
dim(main_list)

NULL

In [269]:
nrow(a_list)

NULL

In [270]:
ncol(a_list)

NULL

In [271]:
NROW(a_list)

In [272]:
NCOL(a_list)

In [275]:
l1 <- list(1:5)
l2 <- list(6:10)

print(l1[[1]])
print(l2[[1]])

l1[[1]] + l2[[1]]

[1] 1 2 3 4 5
[1]  6  7  8  9 10


#### Indexing Lists

In [280]:
# Test list
l <- list(
    first = 1,
    second = 2,
    third = list (
        alpha = 3.1,
        beta = 3.2
    )
)

In [281]:
l[1:2]

In [282]:
l[-3]

In [283]:
l[c("first", "second")]

In [284]:
l[c(TRUE, TRUE, FALSE)]

In [285]:
l[[1]]

In [286]:
l[["first"]]

In [287]:
is.list(l[1])

In [291]:
l$first

In [292]:
l$f

In [293]:
l$second

In [294]:
l$third

In [295]:
l$third$alpha

In [296]:
l[["third"]]["beta"]

In [297]:
l[["third"]][["beta"]]

In [298]:
l[c(4, 2 , 5)]

$<NA>
NULL

$second
[1] 2

$<NA>
NULL


In [299]:
# Element that doesn't exist
l[["fourth"]]

NULL

In [300]:
l$fourth

NULL

#### Converting Between Vectors & Lists

In [302]:
# Convert vector into list
busy_beaver <- c(1, 6, 21, 107)
as.list(busy_beaver)

In [304]:
# List 2 Vec
as.numeric(list(1, 6, 21, 107))

In [307]:
# List with non scalar element
prime_factors <- list ( two = 2,
                       three = 3,
                       four = c(2, 2),
                       five = 5,
                       six = c(2, 3),
                       seven = 7,
                       eight = c(2, 2, 2),
                       nine = c(3, 3),
                       ten = c(2, 5)
                      )
# This type of list can be converted into vector using `unlist` function
unlist(prime_factors)

#### Combining Lists

In [309]:
# Using `c` function for concatenating lists
c(list(a = 1, b = 2), list(3))

$a
[1] 1

$b
[1] 2

[[3]]
[1] 3


In [310]:
matrix_list_hybrid <- cbind(
    list(a = 1, b = 2),
    list(c = 3, list(d = 4))
)

In [311]:
matrix_list_hybrid

0,1,2
a,1,3
b,2,4


In [312]:
str(matrix_list_hybrid)

List of 4
 $ : num 1
 $ : num 2
 $ : num 3
 $ :List of 1
  ..$ d: num 4
 - attr(*, "dim")= int [1:2] 2 2
 - attr(*, "dimnames")=List of 2
  ..$ : chr [1:2] "a" "b"
  ..$ : NULL


#### NULL

In [313]:
bank_holidays_2013 = list (
    Jan = "New Year's Day",
    Feb = NULL,
    Mar = "Good Friday",
    Apr = "Easter Monday",
    May = c("Early May Bank Holiday", "Spring Bank Holiday"),
    Jun = NULL,
    Jul = NULL,
    Aug = "Summer Bank Holiday",
    Sep = NULL,
    Oct = NULL,
    Nov = NULL,
    Dec = c("Christmas Day", "Boxing Day")
)

In [314]:
bank_holidays_2013

$Jan
[1] "New Year's Day"

$Feb
NULL

$Mar
[1] "Good Friday"

$Apr
[1] "Easter Monday"

$May
[1] "Early May Bank Holiday" "Spring Bank Holiday"   

$Jun
NULL

$Jul
NULL

$Aug
[1] "Summer Bank Holiday"

$Sep
NULL

$Oct
NULL

$Nov
NULL

$Dec
[1] "Christmas Day" "Boxing Day"   


In [315]:
length(NULL)

In [316]:
length(NA)

In [317]:
# NA is a scalar value, NULL takes up no space at all

In [318]:
# NULL can be used to remove elements of a list
bank_holidays_2013$Jan <- NULL
print(bank_holidays_2013_holidays_2013)

$Feb
NULL

$Mar
[1] "Good Friday"

$Apr
[1] "Easter Monday"

$May
[1] "Early May Bank Holiday" "Spring Bank Holiday"   

$Jun
NULL

$Jul
NULL

$Aug
[1] "Summer Bank Holiday"

$Sep
NULL

$Oct
NULL

$Nov
NULL

$Dec
[1] "Christmas Day" "Boxing Day"   



In [321]:
bank_holidays_2013["May"] <- list(NULL)

In [323]:
bank_holidays_2013$May

NULL

#### Pairlists

In [324]:
# Checking argument of standard deviation function `sd`
arguments_of_sd <- formals(sd)

In [326]:
class(arguments_of_sd)

In [327]:
pairlist()

NULL

In [328]:
list()

#### Data Frames

In [329]:
a_data_frame <- data.frame(
    x = letters[1:5],
    y = rnorm(5),
    z = runif(5) > 0.5
)

In [330]:
a_data_frame

x,y,z
a,1.9063069,True
b,-0.7179883,True
c,1.1650339,False
d,1.0337471,False
e,1.618705,False


In [331]:
y <- rnorm(5)

In [332]:
names(y) <- month.name[1:5]

In [333]:
y

In [334]:
data.frame(
    x = letters[1:5],
    y = y,
    z = runif(5) > 0.5
)

Unnamed: 0,x,y,z
January,a,1.31718938,False
February,b,0.19331413,True
March,c,-0.77342305,True
April,d,0.01604924,False
May,e,-0.15574349,True


In [335]:
# Removing row names
data.frame(
    x = letters[1:5],
    y = y,
    z = runif(5) > 0.5,
    row.names = NULL
)

x,y,z
a,1.31718938,False
b,0.19331413,True
c,-0.77342305,False
d,0.01604924,True
e,-0.15574349,True


In [336]:
# Adding row names
data.frame(
    x = letters[1:5],
    y = y,
    z = runif(5) > 0.5,
    row.names = c("Hello", "World", "Programming", "in", "R")
)

Unnamed: 0,x,y,z
Hello,a,1.31718938,True
World,b,0.19331413,False
Programming,c,-0.77342305,True
in,d,0.01604924,True
R,e,-0.15574349,True


In [337]:
rownames(a_data_frame)

In [338]:
colnames(a_data_frame)

In [339]:
dimnames(a_data_frame)

In [340]:
nrow(a_data_frame)

In [341]:
ncol(a_data_frame)

In [343]:
# Data frame can be created passing different length of vectors
data.frame(
    x = 1, # recycled 4 times
    y = 2:3, # recycled twice
    z = 4:7 # Longest input; no cycling
)

x,y,z
1,2,4
1,3,5
1,2,6
1,3,7


In [345]:
# If lengths are not compatible, error will be thrown
# Lowest common multiple of all lengths must be equal to the longest vector
data.frame(
    x = 1,
    y = 2:3,
    z = 4:6
)

ERROR: Error in data.frame(x = 1, y = 2:3, z = 4:6): arguments imply differing number of rows: 1, 2, 3


In [346]:
# By default column names are checked to be unique, valid variable names, this can be turned off
data.frame (
    "A column" = letters[1:5],
    "!@#$&" = rnorm(5),
    "..." = runif(5) > 0.5,
    check.names = FALSE
)

A column,!@#$&,...
a,0.8415835,True
b,0.2197354,False
c,-2.4566178,True
d,1.2105292,True
e,1.3967799,True


#### Indexing Data Frames

In [347]:
a_data_frame[2:3, -3]

Unnamed: 0,x,y
2,b,-0.7179883
3,c,1.1650339


In [348]:
a_data_frame[c(FALSE, TRUE, TRUE, FALSE, FALSE), c("x", "y")]

Unnamed: 0,x,y
2,b,-0.7179883
3,c,1.1650339


In [349]:
class(a_data_frame[2:3, -3])

In [350]:
class(a_data_frame[2:3, 1])

In [351]:
a_data_frame$x[2:3]

In [352]:
a_data_frame[[1]][2:3]

In [353]:
a_data_frame[["x"]][2:3]

#### Basic Data Frame Manipulation

In [356]:
# Transposing & converting df into matrix
t(a_data_frame)

0,1,2,3,4,5
x,a,b,c,d,e
y,1.9063069,-0.7179883,1.1650339,1.0337471,1.6187050
z,TRUE,TRUE,FALSE,FALSE,FALSE


In [358]:
# Data frame joining using `cbind` & `rbind`
another_data_frame <- data.frame(
    z = rlnorm(5),
    y = sample(5),
    x = letters[3:7]
)
rbind(a_data_frame, another_data_frame)

x,y,z
a,1.9063069,1.0
b,-0.7179883,1.0
c,1.1650339,0.0
d,1.0337471,0.0
e,1.618705,0.0
c,4.0,1.0904478
d,1.0,0.5601729
e,3.0,1.5073192
f,5.0,0.6840857
g,2.0,0.218162


In [359]:
cbind(a_data_frame, another_data_frame)

x,y,z,z.1,y.1,x.1
a,1.9063069,True,1.0904478,4,c
b,-0.7179883,True,0.5601729,1,d
c,1.1650339,False,1.5073192,3,e
d,1.0337471,False,0.6840857,5,f
e,1.618705,False,0.218162,2,g


In [360]:
# Merging data frames
merge(a_data_frame, another_data_frame, by = "x")

x,y.x,z.x,z.y,y.y
c,1.165034,False,1.0904478,4
d,1.033747,False,0.5601729,1
e,1.618705,False,1.5073192,3


In [361]:
merge(a_data_frame, another_data_frame, by="x", all = TRUE)

x,y.x,z.x,z.y,y.y
a,1.9063069,True,,
b,-0.7179883,True,,
c,1.1650339,False,1.0904478,4.0
d,1.0337471,False,0.5601729,1.0
e,1.618705,False,1.5073192,3.0
f,,,0.6840857,5.0
g,,,0.218162,2.0


In [362]:
# Calculating mean Column/Row wise
colSums(a_data_frame[, 2:3])

In [363]:
colMeans(a_data_frame[, 2:3])

## Chapter 6
### Environments & Functions

#### Environments

In [364]:
an_environment <- new.env()

In [365]:
an_environment[["pythag"]] <- c(12, 15, 20, 21)
an_environment$root <- polyroot(c(6, -5, 1))

In [366]:
assign(
    "moonday",
    weekdays(as.Date("1969/07/20")),
    an_environment
)

In [368]:
an_environment[["pythag"]]

In [369]:
an_environment$root

In [370]:
get("moonday", an_environment)

In [371]:
ls(envir = an_environment)

In [373]:
ls.str(envir = an_environment)

moonday :  chr "Sunday"
pythag :  num [1:4] 12 15 20 21
root :  cplx [1:2] 2+0i 3-0i

In [374]:
exists("pythag", an_environment)