## Creating a Dataframe in R from Other Dataframes

In [114]:

super_sleepers_1 <- data.frame(rating=1:4,animal=c('koala', 'hedgehog', 'sloth', 'panda'))
super_sleepers_2 <- data.frame(country=c('Australia', 'Italy', 'Peru', 'China'),avg_sleep_hours=c(21, 18, 17, 10))

super_sleepers <- cbind(super_sleepers_1, super_sleepers_2)
print(super_sleepers)

  rating   animal   country avg_sleep_hours
1      1    koala Australia              21
2      2 hedgehog     Italy              18
3      3    sloth      Peru              17
4      4    panda     China              10


![1.JPG](attachment:1.JPG)

## Using 'apply' function on dataframe

### The apply() function lets us apply a function to the rows or columns of a matrix or data frame. 

In [15]:
sample_matrix <- matrix(C<-(1:10),nrow=3, ncol=10)

print( "sample matrix:")
sample_matrix
  
# Use apply() function across row to find sum

print("sum across rows:")
apply( sample_matrix, 1, sum)   # Row wise
  
# use apply() function across column to find mean
print("sum across columns:")
apply( sample_matrix, 2, sum)  # Column wise

[1] "sample matrix:"


0,1,2,3,4,5,6,7,8,9
1,4,7,10,3,6,9,2,5,8
2,5,8,1,4,7,10,3,6,9
3,6,9,2,5,8,1,4,7,10


[1] "sum across rows:"


[1] "sum across columns:"


## Using 'lapply' function on dataframe

#### The lapply() function in the R Language takes a list, vector, or data frame as input and gives output in the form of a list object.

In [8]:
# Creating dataset 

m <- c(1,9,6,5,9,6)
n <- c(8,2,3,6,4,3)
o <- c(21,0,14,5,3,2)
p <- c(4,6,3,7,1,3)
  
# creating dataframe
df <- data.frame(A=m,B=n,C=o,D=p)

print(df)

cat("\n")

magic_fun <- function(x){ return (x*7+1) }

# data.frame(lapply(df,magic_fun))

data.frame(lapply(df,function(x) {return (x*7+1)}))

  A B  C D
1 1 8 21 4
2 9 2  0 6
3 6 3 14 3
4 5 6  5 7
5 9 4  3 1
6 6 3  2 3



A,B,C,D
<dbl>,<dbl>,<dbl>,<dbl>
8,57,148,29
64,15,1,43
43,22,99,22
36,43,36,50
64,29,22,8
43,22,15,22


In [26]:
cls_list <- lapply(data, class)

cls_list

# Here output in the form of 'list'

## Using 'sapply' function on dataframe

### sapply() function takes list, vector or data frame as input and gives output in vector or matrix.

#####  Note : sapply function in R does the same job as lapply() function but returns a vector.

In [27]:
cls_vect <- sapply(data, class)

cls_vect

# Here output in the form of 'vector'

In [33]:
sapply(data,sum)

ERROR: Error in FUN(X[[i]], ...): invalid 'type' (character) of argument


## Do.Call on r Dataframe

### The do.call() function in R constructs and executes a function call from a name or a function as well as a list of arguments to be passed to it.

In [47]:
# Syntax
#  do.call(what, args)

In [56]:
# creating the function 
myfunction <- function(a,b) { a + b }

myargs <- list(c(7, 9, 2), c(6, 2, 8))

print(myargs)

do.call(myfunction, myargs)

[[1]]
[1] 7 9 2

[[2]]
[1] 6 2 8



In [65]:
# Example 02:

dataList  <- list(1:3,4:6,7:9)  

print(dataList)

cat("\n")

# It creates matrix
print(do.call(rbind,dataList))

# It Creates Dataframe
data.frame(do.call(rbind,dataList))

[[1]]
[1] 1 2 3

[[2]]
[1] 4 5 6

[[3]]
[1] 7 8 9


     [,1] [,2] [,3]
[1,]    1    2    3
[2,]    4    5    6
[3,]    7    8    9


X1,X2,X3
<int>,<int>,<int>
1,2,3
4,5,6
7,8,9


In [79]:
str_data <- "currency    maturity    ws
         USD        3m            285000000
         USD        3m            456000000
         USD        1y            112000000
         USD        2y            56000000
         EUR        3m            1785000000
         EUR        6m            200000000
         EUR        1y            250000000
         EUR        1y            1855000000
         CNY        6m            84000000
         CNY        6m            42000000
         CNY        6m            144000000
         AUD        6m            213000000
         AUD        2y            106000000
         AUD        2y            214000000"

str_data1 <- read.table(text=str_data, header = TRUE)

print(str_data1)

   currency maturity         ws
1       USD       3m  285000000
2       USD       3m  456000000
3       USD       1y  112000000
4       USD       2y   56000000
5       EUR       3m 1785000000
6       EUR       6m  200000000
7       EUR       1y  250000000
8       EUR       1y 1855000000
9       CNY       6m   84000000
10      CNY       6m   42000000
11      CNY       6m  144000000
12      AUD       6m  213000000
13      AUD       2y  106000000
14      AUD       2y  214000000


## Split data based on Column in dataframe

In [83]:
 # split input data by key columns
    
lt_data <- split(str_data1, str_data1$currency)

In [84]:
lt_data

Unnamed: 0_level_0,currency,maturity,ws
Unnamed: 0_level_1,<chr>,<chr>,<int>
12,AUD,6m,213000000
13,AUD,2y,106000000
14,AUD,2y,214000000

Unnamed: 0_level_0,currency,maturity,ws
Unnamed: 0_level_1,<chr>,<chr>,<int>
9,CNY,6m,84000000
10,CNY,6m,42000000
11,CNY,6m,144000000

Unnamed: 0_level_0,currency,maturity,ws
Unnamed: 0_level_1,<chr>,<chr>,<int>
5,EUR,3m,1785000000
6,EUR,6m,200000000
7,EUR,1y,250000000
8,EUR,1y,1855000000

Unnamed: 0_level_0,currency,maturity,ws
Unnamed: 0_level_1,<chr>,<chr>,<int>
1,USD,3m,285000000
2,USD,3m,456000000
3,USD,1y,112000000
4,USD,2y,56000000


In [91]:
lt_out = lapply(lt_data, function(x){ data.frame(sum_ws = sum(x$ws))})

lt_out

sum_ws
<int>
533000000

sum_ws
<int>
270000000

sum_ws
<dbl>
4090000000.0

sum_ws
<int>
909000000


In [96]:
# concatenate rows

do.call(rbind,lt_out) 

Unnamed: 0_level_0,sum_ws
Unnamed: 0_level_1,<dbl>
AUD,533000000.0
CNY,270000000.0
EUR,4090000000.0
USD,909000000.0


In [104]:
## Multiple output

lt_out= lapply(split(str_data1, str_data1$currency), # split by currency for outer lapply
       function(x) {data.frame(curr = max(x$currency),sum_ws = sum(x$ws),max_ws = max(x$ws))})

lt_out

curr,sum_ws,max_ws
<chr>,<int>,<int>
AUD,533000000,214000000

curr,sum_ws,max_ws
<chr>,<int>,<int>
CNY,270000000,144000000

curr,sum_ws,max_ws
<chr>,<dbl>,<int>
EUR,4090000000.0,1855000000

curr,sum_ws,max_ws
<chr>,<int>,<int>
USD,909000000,456000000


In [110]:
# concatenate rows
df_out = do.call(rbind,lt_out)
rownames(df_out) <- NULL
print(df_out)

  curr   sum_ws     max_ws
1  AUD 5.33e+08  214000000
2  CNY 2.70e+08  144000000
3  EUR 4.09e+09 1855000000
4  USD 9.09e+08  456000000
