In [None]:
#!/usr/bin/env Rscript
# Usage: Rscript join_list_of_files.R data/list_q3.tsv > data/join_output.tsv

library(tidyverse)

args <- commandArgs(trailingOnly = TRUE)

if (length(args) != 1) {
    cat("Usage: Rscript join_list_of_files.R <file_list>\n", file=stderr())
    cat("Example: Rscript join_list_of_files.R data/list_q3.tsv > output.tsv\n", file=stderr())
    quit(status=1)
}

file_list_path <- args[1]

# Read list of files to merge
file_paths <- read_lines(file_list_path)

if (length(file_paths) == 0) {
    cat("Error: No files found in list\n", file=stderr())
    quit(status=1)
}

cat(sprintf("Reading %d files to merge...\n", length(file_paths)), file=stderr())

# Read all files into a list
data_list <- lapply(file_paths, function(fp) {
    cat(sprintf("  Reading: %s\n", fp), file=stderr())
    read_tsv(fp, col_types = cols(.default = "c"), show_col_types = FALSE)
})

# Get the name of the first column (the key column)
key_column <- colnames(data_list[[1]])[1]
cat(sprintf("Joining on column: %s\n", key_column), file=stderr())

# Perform inner join on all dataframes using reduce
merged_df <- reduce(data_list, inner_join, by = key_column)

cat(sprintf("Final result: %d rows, %d columns\n", nrow(merged_df), ncol(merged_df)), file=stderr())

# Output to stdout (use file("") or stdout())
write_tsv(merged_df, file = stdout())