## Initialize shared variables

In [1]:
phenotype_col <- 'Factor.Value.Breast.cancer.diagnosis.'
case_label <- "Metastatic"
control_label <- "Early"

In [2]:
huna_init <- read.csv('/Users/morris/Box/Hall_Lab/Projects/HUNA/MTBLS424/MTBLS424_init.csv',
                      header=TRUE, check.names=FALSE)

nrow <- nrow(huna_init)
print(nrow)
print('assigned variables:')
for (i in 1:nrow){
    assign(toString(huna_init$type[i]), toString(huna_init$path[i]))
    print(toString(huna_init$type[i]))
}

[1] 14
[1] "assigned variables:"
[1] "setwd_path"
[1] "data_set_root_dir"
[1] "path_to_fid"
[1] "norm_bin_500_path"
[1] "norm_bin_5000_path"
[1] "bin500_for_sig_tests"
[1] "figure_path"
[1] "sample_labels_header"
[1] "stat_results_data_path"
[1] "BonSig_Cohen_results_data_path"
[1] "filtered_ROI_path"
[1] "sample_type"
[1] "rDolphin_nmr_input_path"
[1] "rDolphin_nmr_input_path_Early_vs_Late"


In [3]:
#save <- (paste(setdir_path,'/',data_set_root_dir, '/', data_set_root_dir, '_init.csv', sep=''))
#write.csv(huna_init_file, file= save, row.names=FALSE)

## Create MTBLS Metadata.csv for rDolphin

In [4]:
nmr_w_labels <- read.csv(file=bin500_for_sig_tests, header=TRUE, check.names=FALSE)
Sample <- as.data.frame(nmr_w_labels$X)
index_of_case <- which(nmr_w_labels[,phenotype_col] == case_label)
index_of_control <- which(nmr_w_labels[,phenotype_col] == control_label)
index_case_and_control <- c(index_of_case, index_of_control)
Individual <- as.data.frame(index_case_and_control)
nmr_case_control <- nmr_w_labels[index_case_and_control,]
nrow(nmr_case_control)

In [5]:
nmr_case_control[,phenotype_col] <- as.character(nmr_case_control[,phenotype_col])
sample_type_rdol <- sapply(nmr_case_control[,phenotype_col],switch,
                      "Metastatic"=1, "Early"=0)
sample_type_rdol <- as.data.frame(sample_type_rdol,)
nrow(sample_type_rdol)

In [6]:
df_metadata <- data.frame(Sample, Individual, sample_type_rdol)
colnames(df_metadata)[3] <- "Sample Type"
colnames(df_metadata)[2] <- "Individual"
colnames(df_metadata)[1] <- "Sample"
head(df_metadata)

Sample,Individual,Sample Type
<int>,<int>,<dbl>
15002,591,1
15003,592,1
15004,593,1
15005,594,1
15006,595,1
15007,596,1


In [7]:
if (grepl(' ', case_label, fixed = TRUE)){
    case_label <- gsub(" ", "_", case_label)
}
if (grepl('-', case_label, fixed = TRUE)){
    case_label <- gsub("-", "_", case_label)
}
if (grepl('-', case_label, fixed = TRUE)){
    control_label <- gsub("-", "_", control_label)
}
if (grepl(' ', control_label, fixed = TRUE)){
    control_label <- gsub(" ", "_", control_label)
}

In [8]:
metadata_rdolphin_path <- paste(setwd_path, data_set_root_dir, '/output/', data_set_root_dir,
                       '_metadata_', case_label, '_vs_', control_label, '.csv', sep='')
write.csv(df_metadata, file= metadata_rdolphin_path, row.names=FALSE)

In [9]:
sample_type <- tools::toTitleCase(sample_type)
bin_width <- as.numeric(substring((colnames(nmr_w_labels)[2]), 2)) - as.numeric(substring((colnames(nmr_w_labels)[3]), 2))

## Create Parameters.csv for rDolphin

In [10]:
# note to self, at the send of each step, open a txt file that contains the dir of the 

Parameter = c('nmr folder path','1D data index','proc_no','spectra dataset path (csv format)',
              'Metadata path (csv format)','ROI patterns file',
              'Normalization (0=No;1=Eretic; 2=TSP; 3=Creatinine; 4=Spectra Sum; 5=PQN)',
              'Alignment (0=No;1=Glucose; 2=TSP; 3=Formate)',
              'Suppression',
              'Spectrometer Frequency (MHz)',
              'Bucket resolution',
              'Biofluid',
              '2D-Path',
              'Specific program parameters')
Value = c('','','',rDolphin_nmr_input_path_Early_vs_Late, metadata_rdolphin_path, filtered_ROI_path,
          0,0,'',699.87,bin_width,
          sample_type, '', '')

In [11]:
rDolphin_nmr_input_path

In [12]:
df = data.frame(Parameter, Value)
parameters_rdolphin_path <- paste(setwd_path, data_set_root_dir, '/input/', data_set_root_dir,
                                  '_', case_label, '_vs_', control_label,'_rdolphin_parameters',
                                  '.csv'
                                  , sep='')
write.csv(df, file= parameters_rdolphin_path, row.names=FALSE)

## Create R script to Run rDolphin

In [13]:
one <- 'library(rDolphin)
print("rDolphin Loaded")
print("working directory set")
setwd("'
#the working directory would go here
two <- '")
'
three <- 'imported_data = import_data("'
# add the data_set_root_dir only
three_2 <- '/input/'
# add the data_set_root_dir only
four <- '_rdolphin_parameters.csv")
print("imported data successfully")
median_plot = median_plot(imported_data)
print("median plot processed successfully")
profiling_data = automatic_profiling(imported_data, imported_data$ROI_data)
print("*****************************")
print("*******Profiling Done********")
print("*****************************")'

five <- 'write_info("/'
# add the data_set_root_dir only

six <- '/output/rDolphin", profiling_data$final_output, imported_data$ROI_data)
print("writing info successfully")'

seven <- 'save(imported_data, profiling_data, file = "/'
# add the data_set_root_dir only

eight <- '/output/rDolphin/'
# add the data_set_root_dir only
nine <- '_Cohen_ROI_profiling.RData")
print("saved R session profile data.")'

In [14]:
rDolphin_Rscript <- paste(one, setwd_path, two, three, data_set_root_dir, three_2,
                          data_set_root_dir, '_', case_label, '_vs_', control_label,
                          four, five, data_set_root_dir, six,
                          seven, data_set_root_dir, eight,
                          data_set_root_dir, nine, sep='')

In [15]:
print(rDolphin_Rscript)

[1] "library(rDolphin)\nprint(\"rDolphin Loaded\")\nprint(\"working directory set\")\nsetwd(\"/Users/morris/Box/Hall_Lab/Projects/HUNA/\")\nimported_data = import_data(\"MTBLS424/input/MTBLS424_Metastatic_vs_Early_rdolphin_parameters.csv\")\nprint(\"imported data successfully\")\nmedian_plot = median_plot(imported_data)\nprint(\"median plot processed successfully\")\nprofiling_data = automatic_profiling(imported_data, imported_data$ROI_data)\nprint(\"*****************************\")\nprint(\"*******Profiling Done********\")\nprint(\"*****************************\")write_info(\"/MTBLS424/output/rDolphin\", profiling_data$final_output, imported_data$ROI_data)\nprint(\"writing info successfully\")save(imported_data, profiling_data, file = \"/MTBLS424/output/rDolphin/MTBLS424_Cohen_ROI_profiling.RData\")\nprint(\"saved R session profile data.\")"


In [16]:
Rscript_path <- paste(setwd_path, data_set_root_dir,'/scripts/', data_set_root_dir,
                      '_rDolphin_Script_',
                      case_label, '_vs_', control_label,'.R',
                      sep='')
print(Rscript_path)

[1] "/Users/morris/Box/Hall_Lab/Projects/HUNA/MTBLS424/scripts/MTBLS424_rDolphin_Script_Metastatic_vs_Early.R"


In [17]:
fileConn<-file(paste(Rscript_path,sep=''))
writeLines(c(rDolphin_Rscript), fileConn)
close(fileConn)