In [8]:
# preparation for data
manager <- c(1, 2, 3, 4, 5)
date <- c("10/24/08", "10/28/08", "10/1/08", "10/12/08", "5/1/09")
country <- c("US", "US", "UK", "UK", "UK")
gender <- c("M", "F", "F", "M", "F")
age <- c(32, 45, 25, 39, 99)
q1 <- c(5, 3, 3, 3, 2)
q2 <- c(4, 5, 5, 3, 2)
q3 <- c(5, 2, 5, 4, 1)
q4 <- c(5, 5, 5, NA, 2)
q5 <- c(5, 5, 2, NA, 1)
leadership <- data.frame(manager, date, country, gender, age,q1, q2, q3, q4, q5, stringsAsFactors=FALSE)

In [9]:
# creating new variables

mydata <- data.frame(x1 = c(2,2,6,4),
                    x2 = c(3,4,2,8))

mydata <- transform(mydata, sumx = x1+x2, meanx = (x1+x2)/2)
mydata

x1,x2,sumx,meanx
<dbl>,<dbl>,<dbl>,<dbl>
2,3,5,2.5
2,4,6,3.0
6,2,8,4.0
4,8,12,6.0


In [10]:
leadership$age[leadership$age == 99] <- NA

leadership <- within(leadership, {agecat <- NA
                                  agecat[age > 75] <- "Elder"
                                 agecat[age >= 55 & age <= 75] <- "Middle Age"
                                 agecat[age < 55] <- "Young"})
# within() 代码将一行一行执行
leadership

manager,date,country,gender,age,q1,q2,q3,q4,q5,agecat
<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,10/24/08,US,M,32.0,5,4,5,5.0,5.0,Young
2,10/28/08,US,F,45.0,3,5,2,5.0,5.0,Young
3,10/1/08,UK,F,25.0,3,5,5,5.0,2.0,Young
4,10/12/08,UK,M,39.0,3,3,4,,,Young
5,5/1/09,UK,F,,2,2,1,2.0,1.0,


In [11]:
# renaming variables
names(leadership)

In [12]:
names(leadership)[2] <- "TestDate"
leadership

manager,TestDate,country,gender,age,q1,q2,q3,q4,q5,agecat
<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,10/24/08,US,M,32.0,5,4,5,5.0,5.0,Young
2,10/28/08,US,F,45.0,3,5,2,5.0,5.0,Young
3,10/1/08,UK,F,25.0,3,5,5,5.0,2.0,Young
4,10/12/08,UK,M,39.0,3,3,4,,,Young
5,5/1/09,UK,F,,2,2,1,2.0,1.0,


In [13]:
library(plyr)
leadership <- rename(leadership, replace = c(manager = "managerID", TestDate = "testDate"))
leadership

managerID,testDate,country,gender,age,q1,q2,q3,q4,q5,agecat
<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,10/24/08,US,M,32.0,5,4,5,5.0,5.0,Young
2,10/28/08,US,F,45.0,3,5,2,5.0,5.0,Young
3,10/1/08,UK,F,25.0,3,5,5,5.0,2.0,Young
4,10/12/08,UK,M,39.0,3,3,4,,,Young
5,5/1/09,UK,F,,2,2,1,2.0,1.0,


In [14]:
# missing values
is.na(leadership[,6:10])

q1,q2,q3,q4,q5
False,False,False,False,False
False,False,False,False,False
False,False,False,False,False
False,False,False,True,True
False,False,False,False,False


In [15]:
x <- c(1,2,3,NA)
z <- sum(x)
z

In [16]:
z <- sum(x, na.rm = TRUE)
z

In [17]:
newdata <- na.omit(leadership) # 去掉数据中存在缺失值的行
newdata

Unnamed: 0_level_0,managerID,testDate,country,gender,age,q1,q2,q3,q4,q5,agecat
Unnamed: 0_level_1,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,1,10/24/08,US,M,32,5,4,5,5,5,Young
2,2,10/28/08,US,F,45,3,5,2,5,5,Young
3,3,10/1/08,UK,F,25,3,5,5,5,2,Young


In [18]:
# date values
myformat <- "%m/%d/%y"
leadership$testDate <- as.Date(leadership$testDate, myformat)
leadership

managerID,testDate,country,gender,age,q1,q2,q3,q4,q5,agecat
<dbl>,<date>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,2008-10-24,US,M,32.0,5,4,5,5.0,5.0,Young
2,2008-10-28,US,F,45.0,3,5,2,5.0,5.0,Young
3,2008-10-01,UK,F,25.0,3,5,5,5.0,2.0,Young
4,2008-10-12,UK,M,39.0,3,3,4,,,Young
5,2009-05-01,UK,F,,2,2,1,2.0,1.0,


In [19]:
today <- Sys.Date()
format(today, "%Y-%m-%d")
# 日期可以做加减运算

In [20]:
a <- c(2,3,5)
a 

In [21]:
is.numeric(a)

In [22]:
is.vector(a)

In [23]:
a <- as.character(a)

In [24]:
a

In [25]:
is.character(a)

In [28]:
# sorting data
newdata <- leadership[order(leadership$age),]
newdata

Unnamed: 0_level_0,managerID,testDate,country,gender,age,q1,q2,q3,q4,q5,agecat
Unnamed: 0_level_1,<dbl>,<date>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
3,3,2008-10-01,UK,F,25.0,3,5,5,5.0,2.0,Young
1,1,2008-10-24,US,M,32.0,5,4,5,5.0,5.0,Young
4,4,2008-10-12,UK,M,39.0,3,3,4,,,Young
2,2,2008-10-28,US,F,45.0,3,5,2,5.0,5.0,Young
5,5,2009-05-01,UK,F,,2,2,1,2.0,1.0,


In [30]:
newdata <- subset(leadership, age >=35 | age <26 , select = c(q1,q2,q3,q4))
newdata

Unnamed: 0_level_0,q1,q2,q3,q4
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>
2,3,5,2,5.0
3,3,5,5,5.0
4,3,3,4,


In [32]:
newdata <- subset(leadership, age >=35 | age <26 , select = gender:q4)
newdata

Unnamed: 0_level_0,gender,age,q1,q2,q3,q4
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2,F,45,3,5,2,5.0
3,F,25,3,5,5,5.0
4,M,39,3,3,4,
