Back to **[Fan](https://fanwangecon.github.io/)**'s R4Econ Homepage **[Table of Content](https://fanwangecon.github.io/R4Econ/)**

# Generate Joint Quantiles from Multiple Continuous Variables as a Categorical Variable with Linear Index

There are multiple or a single continuous variables. Find which quantile each observation belongs to for each of the variables. Then also generate a joint/interaction variable of all combinations of quantiles from different variables.

The program has these features:

1. Quantiles breaks are generated based on group_by characteristics, meaning quantiles for individual level characteristics when data is panel
2. Quantiles variables apply to full panel at within-group observation levels.
3. Robust to non-unique breaks for quantiles (non-unique grouped together)
4. Quantile categories have detailed labeling (specifying which non-unique groupings belong to quantile)


When joining multiple quantile variables together:

1. First check if only calculate quantiles at observations where all quantile base variables are not null
2. Calculate Quantiles for each variable, with different quantile levels for sub-groups of variables
3. Summary statistics by mulltiple quantile-categorical variables, summary 

## Program

### Support Functions

In [480]:
# Quantiles for any variable
gen_quantiles <- function(var, df, prob=c(0.25, 0.50, 0.75)) {
    enframe(quantile(as.numeric(df[[var]]), prob, na.rm=TRUE), 'quant.perc', var) 
}
# Support Functions for Variable Suffix 
f_Q_suffix <- function(seq.quantiles) {
    quantile.suffix <- paste0('Qs', min(seq.quantiles),
                              'e', max(seq.quantiles),
                              'n', (length(seq.quantiles)-1))
}
# Support Functions for Quantile Labeling
f_Q_label <- function(arr.quantiles, 
                      arr.sort.unique.quantile,
                      seq.quantiles) {
    paste0('(', 
           paste0(which(arr.quantiles %in% arr.sort.unique.quantile), collapse=','), 
           ') of ', f_Q_suffix(seq.quantiles)) 
}
# Generate New Variable Names with Quantile Suffix
f_var_rename <- function(name, seq.quantiles) {
    quantile.suffix <- paste0('_', f_Q_suffix(seq.quantiles))
    return(sub('_q', quantile.suffix, name))
}

### Data Slicing and Quantile Generation

- Function 1: generate quantiles based on group-specific characteristics. the groups could be at the panel observation level as well. 

In [481]:
# First Step, given groups, generate quantiles based on group characteristics
# vars.cts2quantile <- c('wealthIdx', 'hgt0', 'wgt0')
# seq.quantiles <- c(0, 0.3333, 0.6666, 1.0)
# vars.group_by <- c('indi.id')
# vars.arrange <- c('indi.id', 'svymthRound')
# vars.continuous <- c('wealthIdx', 'hgt0', 'wgt0')
df_sliced_quantiles <- function(df, vars.cts2quantile, seq.quantiles, 
                                vars.group_by, vars.arrange) {
    
    # Slicing data
    df.grp.L1 <- df %>% group_by(!!!syms(vars.group_by)) %>% arrange(!!!syms(vars.arrange)) %>% slice(1L) %>% ungroup()
    
    # Quantiles based on sliced data
    df.sliced.quantiles <- lapply(vars.cts2quantile, gen_quantiles, df=df.grp.L1, prob=seq.quantiles) %>% reduce(full_join)
    
    return(list(df.sliced.quantiles=df.sliced.quantiles, 
                df.grp.L1=df.grp.L1))
}

## Data Cutting

- Function 2: cut groups for full panel dataframe based on group-specific characteristics quantiles.

In [482]:
# Cutting Function, Cut Continuous Variables into Quantiles with labeing
f_cut <- function(var, df.sliced.quantiles, include.lowest=TRUE, fan.labels=TRUE, print=FALSE) {
    
    # unparsed string variable name
    var.str <- substitute(var)
    
    # Breaks
    arr.quantiles <- df.sliced.quantiles[[var.str]]
    arr.sort.unique.quantiles <- sort(unique(arr.quantiles))
    if (print) {
        print(arr.sort.unique.quantiles)
    }
    
    # Regular cutting With Standard Labels
    # TRUE, means the lowest group has closed bracket left and right 
    var.quantile <- cut(var, breaks=arr.sort.unique.quantiles, include.lowest=include.lowest)
    
    # Use my custom labels
    if (fan.labels) {
        levels.suffix <- lapply(arr.sort.unique.quantiles[1:(length(arr.sort.unique.quantiles)-1)],
                                f_Q_label,
                                arr.quantiles=arr.quantiles,
                                seq.quantiles=seq.quantiles)
        if (print) {
            print(levels.suffix)
        }
        levels(var.quantile) <- paste0(levels(var.quantile), '; ', levels.suffix)
    }
    
    # Return
    return(var.quantile)
}

In [483]:
# Combo Quantile Function
# vars.cts2quantile <- c('wealthIdx', 'hgt0', 'wgt0')
# seq.quantiles <- c(0, 0.3333, 0.6666, 1.0)
# vars.group_by <- c('indi.id')
# vars.arrange <- c('indi.id', 'svymthRound')
# vars.continuous <- c('wealthIdx', 'hgt0', 'wgt0')
df_cut_by_sliced_quantiles <- function(df, vars.cts2quantile, seq.quantiles, 
                                       vars.group_by, vars.arrange) {
    
    # First Step Slicing
    df.sliced <- df_sliced_quantiles(df, vars.cts2quantile, seq.quantiles, vars.group_by, vars.arrange)

    # Second Step Generate Categorical Variables of Quantiles
    df.with.cut.quant <- df %>% mutate_at(vars.cts2quantile,
                               funs(q=f_cut(., df.sliced$df.sliced.quantiles, 
                                           include.lowest=TRUE, fan.labels=TRUE)))
    
    if (length(vars.cts2quantile) > 1) {
        df.with.cut.quant <- df.with.cut.quant %>% 
                              rename_at(vars(contains('_q')), 
                                        funs(f_var_rename(., seq.quantiles=seq.quantiles)))
    } else {
        new.var.name <- paste0(vars.cts2quantile[1], '_', f_Q_suffix(seq.quantiles))
        df.with.cut.quant <- df.with.cut.quant %>% rename(!!new.var.name := q)
    }
    
    # Newly Generated Quantile-Cut Variables
    vars.quantile.cut <- df.with.cut.quant %>% 
                select(matches(paste0(vars.cts2quantile, collapse='|'))) %>% 
                select(matches(f_Q_suffix(seq.quantiles)))
    
    # Return
    return(list(df.with.cut.quant = df.with.cut.quant, 
                df.sliced.quantiles=df.sliced$df.sliced.quantiles, 
                df.grp.L1=df.sliced$df.grp.L1,
                vars.quantile.cut=vars.quantile.cut))
    
}

## Use Program

### Load Data

In [484]:
# Library
library(tidyverse)

# Load Sample Data
setwd('C:/Users/fan/R4Econ/_data/')
df <- read_csv('height_weight.csv')

Parsed with column specification:
cols(
  S.country = col_character(),
  vil.id = col_double(),
  indi.id = col_double(),
  sex = col_character(),
  svymthRound = col_double(),
  momEdu = col_double(),
  wealthIdx = col_double(),
  hgt = col_double(),
  wgt = col_double(),
  hgt0 = col_double(),
  wgt0 = col_double(),
  prot = col_double(),
  cal = col_double(),
  p.A.prot = col_double(),
  p.A.nProt = col_double()
)


## Line by Line--Quantiles Var by Var

The idea of the function is to generate quantiles levels first, and then use those to generate the categories based on quantiles. Rather than doing this in one step. These are done in two steps, to increase clarity in the quantiles used for quantile category generation. And a dataframe with these quantiles are saved as a separate output of the function.

### Dataframe of Variables' Group-by Level Quantiles

Quantiles from Different Variables. Note that these variables are specific to the individual, not individual/month. So we need to first slick the data, so that we only get the first rows. 

Do this in several steps to clarify group_by level. No speed loss. 

In [485]:
# Selected Variables, many Percentiles
vars.group_by <- c('indi.id')
vars.arrange <- c('indi.id', 'svymthRound')
vars.cts2quantile <- c('wealthIdx', 'hgt0', 'wgt0')
seq.quantiles <- c(0, 0.3333, 0.6666, 1.0)
df.sliced <- df_sliced_quantiles(df, vars.cts2quantile, seq.quantiles, vars.group_by, vars.arrange)
df.sliced.quantiles <- df.sliced$df.sliced.quantiles
df.grp.L1 <- df.sliced$df.grp.L1

Joining, by = "quant.perc"
Joining, by = "quant.perc"


In [486]:
df.sliced.quantiles

quant.perc,wealthIdx,hgt0,wgt0
0%,0.5,40.8,1403.8
33.33%,0.5,48.6,2855.4
66.66%,3.0,50.3,3224.743
100%,14.1,58.7,4970.4


In [487]:
# Quantiles all Variables
suppressMessages(lapply(names(df), gen_quantiles, df=df.grp.L1, prob=seq(0.1,0.9,0.10)) %>% reduce(full_join))

"NAs introduced by coercion"

quant.perc,S.country,vil.id,indi.id,sex,svymthRound,momEdu,wealthIdx,hgt,wgt,hgt0,wgt0,prot,cal,p.A.prot,p.A.nProt
10%,,3.0,203.2,,0,5.8,0.5,46.5,1352.76,46.7,2534.36,0.5,0.5,29.49,0.5
20%,,4.0,405.4,,0,6.8,0.5,47.6,1806.74,47.8,2695.9,0.5,0.5,198.86,0.5
30%,,6.0,607.6,,0,7.6,0.5,48.4,2214.4,48.4,2814.72,0.5,0.5,743.63,5.0
40%,,8.0,809.8,,0,8.5,1.0,49.0,2647.36,48.9,2938.16,0.5,0.5,1012.14,35.9
50%,,9.0,1012.0,,0,9.3,2.0,49.5,3048.1,49.4,3049.0,0.5,0.5,1243.85,120.7
60%,,13.0,1214.2,,0,10.1,2.9,50.1,3449.88,50.0,3153.24,0.5,3.7,1541.16,228.44
70%,,14.0,1416.4,,0,11.25,3.0,50.7,3831.0,50.4,3289.64,0.8,7.4,2345.28,262.18
80%,,17.0,1618.6,,0,12.6,4.0,51.4,4231.14,51.1,3435.58,1.3,11.3,4359.0,296.3
90%,,26.8,1820.8,,0,14.4,6.0,52.5,4638.08,52.1,3695.08,1.6,15.55,10748.95,365.36


### Cut Quantile Categorical Variables

Using the Quantiles we have generate, cut the continuous variables to generate categorical quantile variables in the full dataframe.

Note that we can only cut based on unique breaks, but sometimes quantile break-points are the same if some values are often observed, and also if there are too few observations with respect to quantile groups. 

To resolve this issue, we only look at unique quantiles. 

We need several support Functions:
1. support functions to generate suffix for quantile variables based on quantile cuts
2. support for labeling variables of resulting quantiles beyond bracketing

In [488]:
# Function Testing
arr.quantiles <- df.sliced.quantiles[[substitute('wealthIdx')]]
arr.quantiles
arr.sort.unique.quantiles <- sort(unique(df.sliced.quantiles[[substitute('wealthIdx')]]))
arr.sort.unique.quantiles
f_Q_label(arr.quantiles, arr.sort.unique.quantiles[1], seq.quantiles)
f_Q_label(arr.quantiles, arr.sort.unique.quantiles[2], seq.quantiles)
lapply(arr.sort.unique.quantiles[1:(length(arr.sort.unique.quantiles)-1)],
       f_Q_label,
       arr.quantiles=arr.quantiles,
       seq.quantiles=seq.quantiles)

In [489]:
# Generate Categorical Variables of Quantiles
vars.group_by <- c('indi.id')
vars.arrange <- c('indi.id', 'svymthRound')
vars.cts2quantile <- c('wealthIdx', 'hgt0', 'wgt0')
seq.quantiles <- c(0, 0.3333, 0.6666, 1.0)
df.cut <- df_cut_by_sliced_quantiles(df, vars.cts2quantile, seq.quantiles, vars.group_by, vars.arrange)
vars.quantile.cut <- df.cut$vars.quantile.cut
df.with.cut.quant <- df.cut$df.with.cut.quant
df.grp.L1 <- df.cut$df.grp.L1

Joining, by = "quant.perc"
Joining, by = "quant.perc"


In [490]:
# Cut Variables Generated
names(vars.quantile.cut)
summary(vars.quantile.cut)

                 wealthIdx_Qs0e1n3                      hgt0_Qs0e1n3  
 [0.5,3]; (1,2) of Qs0e1n3:24964   [40.8,48.6]; (1) of Qs0e1n3:10476  
 (3,14.1]; (3) of Qs0e1n3 :10116   (48.6,50.3]; (2) of Qs0e1n3:10080  
                                   (50.3,58.7]; (3) of Qs0e1n3: 9448  
                                   NA's                       : 5076  
                              wgt0_Qs0e1n3  
 [1.4e+03,2.86e+03]; (1) of Qs0e1n3 :10106  
 (2.86e+03,3.22e+03]; (2) of Qs0e1n3:10052  
 (3.22e+03,4.97e+03]; (3) of Qs0e1n3: 9832  
 NA's                               : 5090  

In [491]:
# options(repr.matrix.max.rows=50, repr.matrix.max.cols=20)
# df.with.cut.quant

### Individual Variables' Quantile Cuts Review Results

In [492]:
# Group By Results
f.count <- function(df, var.cts, seq.quantiles) {
    df %>% select(S.country, indi.id, svymthRound, matches(paste0(var.cts, collapse='|'))) %>%
        group_by(!!sym(f_var_rename(paste0(var.cts,'_q'), seq.quantiles))) %>%
        summarise_all(funs(n=n()))
}

In [493]:
# Full Panel Results
lapply(vars.cts2quantile, f.count, df=df.with.cut.quant, seq.quantiles=seq.quantiles)

wealthIdx_Qs0e1n3,S.country_n,indi.id_n,svymthRound_n,wealthIdx_n
"[0.5,3]; (1,2) of Qs0e1n3",24964,24964,24964,24964
"(3,14.1]; (3) of Qs0e1n3",10116,10116,10116,10116

hgt0_Qs0e1n3,S.country_n,indi.id_n,svymthRound_n,hgt0_n
"[40.8,48.6]; (1) of Qs0e1n3",10476,10476,10476,10476
"(48.6,50.3]; (2) of Qs0e1n3",10080,10080,10080,10080
"(50.3,58.7]; (3) of Qs0e1n3",9448,9448,9448,9448
,5076,5076,5076,5076

wgt0_Qs0e1n3,S.country_n,indi.id_n,svymthRound_n,wgt0_n
"[1.4e+03,2.86e+03]; (1) of Qs0e1n3",10106,10106,10106,10106
"(2.86e+03,3.22e+03]; (2) of Qs0e1n3",10052,10052,10052,10052
"(3.22e+03,4.97e+03]; (3) of Qs0e1n3",9832,9832,9832,9832
,5090,5090,5090,5090


In [494]:
# Results Individual Slice
lapply(vars.cts2quantile, f.count, 
       df=(df.with.cut.quant %>% group_by(!!!syms(vars.group_by)) %>% arrange(!!!syms(vars.arrange)) %>% slice(1L)), 
       seq.quantiles = seq.quantiles)

wealthIdx_Qs0e1n3,S.country_n,indi.id_n,svymthRound_n,wealthIdx_n
"[0.5,3]; (1,2) of Qs0e1n3",1461,1461,1461,1461
"(3,14.1]; (3) of Qs0e1n3",562,562,562,562

hgt0_Qs0e1n3,S.country_n,indi.id_n,svymthRound_n,hgt0_n
"[40.8,48.6]; (1) of Qs0e1n3",593,593,593,593
"(48.6,50.3]; (2) of Qs0e1n3",571,571,571,571
"(50.3,58.7]; (3) of Qs0e1n3",542,542,542,542
,317,317,317,317

wgt0_Qs0e1n3,S.country_n,indi.id_n,svymthRound_n,wgt0_n
"[1.4e+03,2.86e+03]; (1) of Qs0e1n3",569,569,569,569
"(2.86e+03,3.22e+03]; (2) of Qs0e1n3",567,567,567,567
"(3.22e+03,4.97e+03]; (3) of Qs0e1n3",569,569,569,569
,318,318,318,318


## Differential Quantiles for Different Variables Then Combine to Form New Groups
Collect together different quantile base variables and their percentile cuttings quantile rules. 

### Input Parameters

In [538]:
# Generate Categorical Variables of Quantiles
vars.group_by <- c('indi.id')
vars.arrange <- c('indi.id', 'svymthRound')

In [539]:
# Quantile Variables and Quantiles
vars.cts2quantile.wealth <- c('wealthIdx')
seq.quantiles.wealth <- c(0, .5, 1.0)
vars.cts2quantile.wgthgt <- c('hgt0', 'wgt0')
seq.quantiles.wgthgt <- c(0, .3333, 0.6666, 1.0)
drop.any.quantile.na <- TRUE
# collect to list
list.cts2quantile <- list(list(vars=vars.cts2quantile.wealth,
                               prob=seq.quantiles.wealth),
                          list(vars=vars.cts2quantile.wgthgt,
                               prob=seq.quantiles.wgthgt))

## Check if Within Group Variables Are The Same

Need to make sure quantile variables are unique within groups

In [540]:
vars.cts2quantile <- unlist(lapply(list.cts2quantile, function(elist) elist$vars))
df %>% group_by(!!!syms(vars.group_by)) %>% 
        mutate(quant_vars_paste = paste0(vars.cts2quantile, collapse=',')) %>%
        mutate(unique_wealth_in_group = n_distinct(quant_vars_paste)) %>%
        slice(1L) %>%
        ungroup() %>% 
        group_by(unique_wealth_in_group) %>%
        summarise(n=n())

unique_wealth_in_group,n
1,2023


### Keep only non-NA for all Quantile Variables

In [541]:
# Original dimensions
dim(df)
# All Continuous Variables from lists
vars.cts2quantile <- unlist(lapply(list.cts2quantile, function(elist) elist$vars))
vars.cts2quantile
# Keep only if not NA for all Quantile variables       
if (drop.any.quantile.na) {
    df.select <- df %>% drop_na(c(vars.group_by, vars.arrange, vars.cts2quantile))
}
dim(df.select)

### Apply Quantiles for Each Quantile Variable

In [542]:
# Dealing with a list of quantile variables
df.cut.wealth <- df_cut_by_sliced_quantiles(df.select, vars.cts2quantile.wealth, seq.quantiles.wealth, vars.group_by, vars.arrange)
summary(df.cut.wealth$vars.quantile.cut)
# summary((df.cut.wealth$df.with.cut.quant)[['wealthIdx_Qs0e1n2']])
# df.cut.wealth$df.with.cut.quant %>% filter(is.na(wealthIdx_Qs0e1n2))
# df.cut.wealth$df.with.cut.quant %>% filter(indi.id == 500)

                wealthIdx_Qs0e1n2
 [0.5,2]; (1) of Qs0e1n3 :15160  
 (2,14.1]; (2) of Qs0e1n3:14796  

In [543]:
df.cut.wgthgt <- df_cut_by_sliced_quantiles(df.select, vars.cts2quantile.wgthgt, seq.quantiles.wgthgt, vars.group_by, vars.arrange)
summary(df.cut.wgthgt$vars.quantile.cut)

Joining, by = "quant.perc"


                      hgt0_Qs0e1n3  
 [40.8,48.6]; (1) of Qs0e1n3:10444  
 (48.6,50.3]; (2) of Qs0e1n3:10080  
 (50.3,58.7]; (3) of Qs0e1n3: 9432  
                              wgt0_Qs0e1n3  
 [1.4e+03,2.86e+03]; (1) of Qs0e1n3 :10106  
 (2.86e+03,3.22e+03]; (2) of Qs0e1n3:10034  
 (3.22e+03,4.97e+03]; (3) of Qs0e1n3: 9816  

### Apply Quantiles Functionally

In [544]:
df_cut_by_sliced_quantiles_grps <- function(quantile.grp.list, df, vars.group_by, vars.arrange) {
    vars.cts2quantile <- quantile.grp.list$vars
    seq.quantiles <- quantile.grp.list$prob
    return(df_cut_by_sliced_quantiles(df, vars.cts2quantile, seq.quantiles, vars.group_by, vars.arrange))
}

In [545]:
# Apply function
df.cut.list <- lapply(list.cts2quantile, df_cut_by_sliced_quantiles_grps, 
                      df=df.select, vars.group_by=vars.group_by, vars.arrange=vars.arrange)

Joining, by = "quant.perc"


In [546]:
# Reduce Resulting Matrixes Together
df.with.cut.quant.all <- lapply(df.cut.list, function(elist) elist$df.with.cut.quant) %>% reduce(left_join)
dim(df.with.cut.quant.all)

Joining, by = c("S.country", "vil.id", "indi.id", "sex", "svymthRound", "momEdu", "wealthIdx", "hgt", "wgt", "hgt0", "wgt0", "prot", "cal", "p.A.prot", "p.A.nProt")


In [567]:
# Obrain Newly Created Quantile Group Variables
vars.quantile.cut.all <- unlist(lapply(df.cut.list, function(elist) names(elist$vars.quantile.cut)))
vars.quantile.cut.all

### Summarize by Groups

Summarize by all groups.

In [568]:
summary(df.with.cut.quant.all %>% select(one_of(vars.quantile.cut.all)))

                wealthIdx_Qs0e1n2                      hgt0_Qs0e1n3  
 [0.5,2]; (1) of Qs0e1n3 :15160   [40.8,48.6]; (1) of Qs0e1n3:10444  
 (2,14.1]; (2) of Qs0e1n3:14796   (48.6,50.3]; (2) of Qs0e1n3:10080  
                                  (50.3,58.7]; (3) of Qs0e1n3: 9432  
                              wgt0_Qs0e1n3  
 [1.4e+03,2.86e+03]; (1) of Qs0e1n3 :10106  
 (2.86e+03,3.22e+03]; (2) of Qs0e1n3:10034  
 (3.22e+03,4.97e+03]; (3) of Qs0e1n3: 9816  

In [569]:
# df.with.cut.quant.all %>%
#     group_by(!!!syms(vars.quantile.cut.all)) %>%
#     summarise_at(vars.cts2quantile, funs(mean, n()))

### Generate Joint Quantile Vars Unique Groups

In [570]:
var.qjnt.grp.idx <- 'group.index'
df.with.cut.quant.all <- df.with.cut.quant.all %>% mutate(!!var.qjnt.grp.idx := group_indices(., !!!syms(vars.quantile.cut.all)))

In [583]:
arr.group.idx <- t(sort(unique(df.with.cut.quant.all[[var.qjnt.grp.idx]])))
arr.group.idx

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18


In [575]:
df.with.cut.quant.all %>% group_by(!!!syms(vars.quantile.cut.all), !!sym(var.qjnt.grp.idx)) %>%
        summarise_at(vars.cts2quantile, funs(mean, n()))

wealthIdx_Qs0e1n2,hgt0_Qs0e1n3,wgt0_Qs0e1n3,group.index,wealthIdx_mean,hgt0_mean,wgt0_mean,wealthIdx_n,hgt0_n,wgt0_n
"[0.5,2]; (1) of Qs0e1n3","[40.8,48.6]; (1) of Qs0e1n3","[1.4e+03,2.86e+03]; (1) of Qs0e1n3",1,1.1078134,46.71157,2521.033,3302,3302,3302
"[0.5,2]; (1) of Qs0e1n3","[40.8,48.6]; (1) of Qs0e1n3","(2.86e+03,3.22e+03]; (2) of Qs0e1n3",2,1.0743658,47.71108,3031.262,1498,1498,1498
"[0.5,2]; (1) of Qs0e1n3","[40.8,48.6]; (1) of Qs0e1n3","(3.22e+03,4.97e+03]; (3) of Qs0e1n3",3,0.8926174,47.61074,3433.848,298,298,298
"[0.5,2]; (1) of Qs0e1n3","(48.6,50.3]; (2) of Qs0e1n3","[1.4e+03,2.86e+03]; (1) of Qs0e1n3",4,1.2016949,49.36254,2713.513,1180,1180,1180
"[0.5,2]; (1) of Qs0e1n3","(48.6,50.3]; (2) of Qs0e1n3","(2.86e+03,3.22e+03]; (2) of Qs0e1n3",5,1.1319149,49.431,3037.172,1974,1974,1974
"[0.5,2]; (1) of Qs0e1n3","(48.6,50.3]; (2) of Qs0e1n3","(3.22e+03,4.97e+03]; (3) of Qs0e1n3",6,0.9644783,49.59836,3466.47,1706,1706,1706
"[0.5,2]; (1) of Qs0e1n3","(50.3,58.7]; (3) of Qs0e1n3","[1.4e+03,2.86e+03]; (1) of Qs0e1n3",7,0.7647059,51.07412,2677.209,170,170,170
"[0.5,2]; (1) of Qs0e1n3","(50.3,58.7]; (3) of Qs0e1n3","(2.86e+03,3.22e+03]; (2) of Qs0e1n3",8,1.3049005,51.22358,3085.581,1306,1306,1306
"[0.5,2]; (1) of Qs0e1n3","(50.3,58.7]; (3) of Qs0e1n3","(3.22e+03,4.97e+03]; (3) of Qs0e1n3",9,0.8845411,52.04326,3699.735,3726,3726,3726
"(2,14.1]; (2) of Qs0e1n3","[40.8,48.6]; (1) of Qs0e1n3","[1.4e+03,2.86e+03]; (1) of Qs0e1n3",10,4.2859296,46.92462,2555.903,3582,3582,3582


In [574]:
df.with.cut.quant.all  %>% group_by(!!!syms(vars.group_by)) %>% arrange(!!!syms(vars.arrange)) %>% slice(1L) %>%
        group_by(!!!syms(vars.quantile.cut.all), !!sym(var.qjnt.grp.idx)) %>%
        summarise_at(vars.cts2quantile, funs(mean, n()))

wealthIdx_Qs0e1n2,hgt0_Qs0e1n3,wgt0_Qs0e1n3,group.index,wealthIdx_mean,hgt0_mean,wgt0_mean,wealthIdx_n,hgt0_n,wgt0_n
"[0.5,2]; (1) of Qs0e1n3","[40.8,48.6]; (1) of Qs0e1n3","[1.4e+03,2.86e+03]; (1) of Qs0e1n3",1,1.0899471,46.69735,2521.705,189,189,189
"[0.5,2]; (1) of Qs0e1n3","[40.8,48.6]; (1) of Qs0e1n3","(2.86e+03,3.22e+03]; (2) of Qs0e1n3",2,1.0494253,47.71954,3031.062,87,87,87
"[0.5,2]; (1) of Qs0e1n3","[40.8,48.6]; (1) of Qs0e1n3","(3.22e+03,4.97e+03]; (3) of Qs0e1n3",3,0.8611111,47.61111,3436.694,18,18,18
"[0.5,2]; (1) of Qs0e1n3","(48.6,50.3]; (2) of Qs0e1n3","[1.4e+03,2.86e+03]; (1) of Qs0e1n3",4,1.1865672,49.36418,2714.403,67,67,67
"[0.5,2]; (1) of Qs0e1n3","(48.6,50.3]; (2) of Qs0e1n3","(2.86e+03,3.22e+03]; (2) of Qs0e1n3",5,1.1132743,49.4292,3037.689,113,113,113
"[0.5,2]; (1) of Qs0e1n3","(48.6,50.3]; (2) of Qs0e1n3","(3.22e+03,4.97e+03]; (3) of Qs0e1n3",6,0.9366337,49.59802,3469.666,101,101,101
"[0.5,2]; (1) of Qs0e1n3","(50.3,58.7]; (3) of Qs0e1n3","[1.4e+03,2.86e+03]; (1) of Qs0e1n3",7,0.75,51.06,2681.61,10,10,10
"[0.5,2]; (1) of Qs0e1n3","(50.3,58.7]; (3) of Qs0e1n3","(2.86e+03,3.22e+03]; (2) of Qs0e1n3",8,1.2891892,51.22027,3085.723,74,74,74
"[0.5,2]; (1) of Qs0e1n3","(50.3,58.7]; (3) of Qs0e1n3","(3.22e+03,4.97e+03]; (3) of Qs0e1n3",9,0.8585586,52.04505,3705.783,222,222,222
"(2,14.1]; (2) of Qs0e1n3","[40.8,48.6]; (1) of Qs0e1n3","[1.4e+03,2.86e+03]; (1) of Qs0e1n3",10,4.2859296,46.92462,2555.903,199,199,199


### Change values Based on Index

Index from 1 to 18, change input values based on index

In [586]:
# arr.group.idx.subsidy <- arr.group.idx*2 - ((arr.group.idx)^2)*0.01
arr.group.idx.subsidy <- arr.group.idx*2 
df.with.cut.quant.all %>% 
        mutate(more_prot = prot + arr.group.idx.subsidy[!!sym(var.qjnt.grp.idx)]) %>% 
        group_by(!!!syms(vars.quantile.cut.all), !!sym(var.qjnt.grp.idx))  %>% 
        summarise_at(c('more_prot', 'prot'), funs(mean(., na.rm=TRUE)))

wealthIdx_Qs0e1n2,hgt0_Qs0e1n3,wgt0_Qs0e1n3,group.index,more_prot,prot
"[0.5,2]; (1) of Qs0e1n3","[40.8,48.6]; (1) of Qs0e1n3","[1.4e+03,2.86e+03]; (1) of Qs0e1n3",1,13.31523,11.31523
"[0.5,2]; (1) of Qs0e1n3","[40.8,48.6]; (1) of Qs0e1n3","(2.86e+03,3.22e+03]; (2) of Qs0e1n3",2,16.29865,12.29865
"[0.5,2]; (1) of Qs0e1n3","[40.8,48.6]; (1) of Qs0e1n3","(3.22e+03,4.97e+03]; (3) of Qs0e1n3",3,23.84177,17.84177
"[0.5,2]; (1) of Qs0e1n3","(48.6,50.3]; (2) of Qs0e1n3","[1.4e+03,2.86e+03]; (1) of Qs0e1n3",4,21.74986,13.74986
"[0.5,2]; (1) of Qs0e1n3","(48.6,50.3]; (2) of Qs0e1n3","(2.86e+03,3.22e+03]; (2) of Qs0e1n3",5,21.7603,11.7603
"[0.5,2]; (1) of Qs0e1n3","(48.6,50.3]; (2) of Qs0e1n3","(3.22e+03,4.97e+03]; (3) of Qs0e1n3",6,27.83359,15.83359
"[0.5,2]; (1) of Qs0e1n3","(50.3,58.7]; (3) of Qs0e1n3","[1.4e+03,2.86e+03]; (1) of Qs0e1n3",7,30.92424,16.92424
"[0.5,2]; (1) of Qs0e1n3","(50.3,58.7]; (3) of Qs0e1n3","(2.86e+03,3.22e+03]; (2) of Qs0e1n3",8,26.8604,10.8604
"[0.5,2]; (1) of Qs0e1n3","(50.3,58.7]; (3) of Qs0e1n3","(3.22e+03,4.97e+03]; (3) of Qs0e1n3",9,33.57108,15.57108
"(2,14.1]; (2) of Qs0e1n3","[40.8,48.6]; (1) of Qs0e1n3","[1.4e+03,2.86e+03]; (1) of Qs0e1n3",10,30.49532,10.49532
