# Test of propensity score matching

In [1]:
### Code to perform Optimal full matching by Mahalanobis
### distance on numerical variables and propensity score
### clamping on numeric and categorical variables.

### REQUIREMENTS:
# R installation
# optmatch package in R
# ** install in R using install.packages('optmatch')
# rpy2
# ** installation instructions from https://pypi.python.org/pypi/rpy2

### Load python libraries
import rpy2.robjects as robjects

In [2]:
### Variables to be parsed to R
# Working directory
work_dir = "/data1/subtypes/test/"
# CSV file name
CSV_file =  "schizo_matching_60vols_20151111.csv"
# Declare a list of variables that need to be categorical (using the names from CSV; assuming NOSPACE)
categories = robjects.StrVector(['study', 'sex','sz'])
# Declare Formula for Mahalanobis distance matching (using the names from CSV)
## Format: Disease variable ~  What to Match by 1 + What to Match by 2 + ...
## All variables matched by must be NUMERIC
Mah_formula = 'sz ~ age + FD' 
# Declare Caliper Width (as a fraction of the SD of the Propensity Scores)
cal_width = 0.5
# Declare Formula for PSM distance matching (using the names from CSV)
## Format: Disease variable ~  What to Match by 1 + What to Match by 2 + ...
PSM_formula = 'sz ~ age + FD + study + sex'

In [3]:
### Parsing Stuff
robjects.globalenv["work_dir"] = work_dir
robjects.globalenv["CSV_file"] = CSV_file
robjects.globalenv["categories"] = categories
robjects.globalenv["Mah_formula"] = Mah_formula
robjects.globalenv["cal_width"] = cal_width
robjects.globalenv["PSM_formula"] = PSM_formula

In [4]:
# Run R script
## R scripts write a CSV file of the form
## matching_*CSV_file*
## containing two extra columns;
## keep column is 1 to keep and 0 to leave out.
robjects.r('''
  # Load R Librarires
  library(optmatch)
  
  # Set Working directory
  setwd(work_dir)
  
  # Read CSV
  data <- read.csv(CSV_file)
  
  # Make all categories factors
  for (ff in 1:length(categories)) {
    data[[categories[ff]]] <- as.factor(data[[categories[ff]]])
  }
  
  # Performs Matching
  Matching <- fullmatch(
  match_on( as.formula(Mah_formula),
           data = data ) + 
    caliper( match_on( as.formula(PSM_formula), 
                     data = data ),
            width = cal_width ),
  data = data )
  
  # Make a data frame with a column with 1 to keep an observation and 0 to leave out
  save_data <- cbind(data,match=Matching)
  save_data$keep <- as.numeric(!is.na(save_data$match))
  write.csv(save_data,file=paste('matched_',CSV_file,sep=''))
''')


  res = super(Function, self).__call__(*new_args, **new_kwargs)

  res = super(Function, self).__call__(*new_args, **new_kwargs)


rpy2.rinterface.NULL