In [1]:
# Load necessary libraries
library(tidyverse)
library(ggplot2)
library(readr)
library(purrr)
library(dplyr)

── [1mAttaching core tidyverse packages[22m ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.2     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.2     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.1     
── [1mConflicts[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to b

In [2]:
# Function to process one experiment
process_experiment <- function(experiment_name) {
  
  # Define the paths to the directories for the two species
  path_cs <- paste0(experiment_name, "/5s/objects/cs")
  path_cr <- paste0(experiment_name, "/5s/objects/cr")
  
  # Read all csv files and add a 'Species' column
  data_cr <- read_all_csvs(path_cr) %>% mutate(Species = 'cr')
  print(paste("Read", nrow(data_cr), "rows for species cr in", experiment_name))
  data_cs <- read_all_csvs(path_cs) %>% mutate(Species = 'cs')
  print(paste("Read", nrow(data_cs), "rows for species cs in", experiment_name))
  
  # Combine data
  all_data <- bind_rows(data_cr, data_cs)
  
  # Ensure the Species column is a factor for correct plotting
  all_data$Species <- as.factor(all_data$Species)
  
  # Rest of your existing code here...
  
  # List of species
  species <- c("cr", "cs")
  
  # Columns to save
  cols_to_save <- c("AreaShape_Area", "AreaShape_Perimeter", "AreaShape_Eccentricity", 
                    "AreaShape_MajorAxisLength", "AreaShape_MinorAxisLength")
    
  # For each species and each column, create a new data frame with unique paths as columns
  for (sp in species) {
    for (col in cols_to_save) {
      print(paste("Processing species", sp, "and column", col, "for experiment", experiment_name))
      # Subset data for the current species and column
      data_subset <- all_data %>%
        filter(Species == sp) %>%
        select(Path, Species, col) %>%
        group_by(Path, Species) %>%
        mutate(id_row = row_number()) %>% # Add unique ID within each group
        ungroup()
      
      # Pivot the data so each unique path becomes a column
      data_pivot <- data_subset %>% 
        pivot_wider(names_from = Path, values_from = col, 
                    id_cols = c("Species", "id_row"))  # Now we pivot also by id_row
      
      # Remove the id_row column from the resulting dataset
      data_pivot$id_row <- NULL
      
      # Create a filename
      filename <- paste0(experiment_name, "_", sp, "_", col, ".csv")
      print(paste("Writing file", filename))
      
      # Write the CSV file
      write.csv(data_pivot, file.path("output", filename), row.names = FALSE)
    }
  }
  
}


In [3]:
# Function to read all csv files in a given path.Adds a new column with the path
read_all_csvs <- function(path) {
  files <- list.files(path, full.names = TRUE, recursive = TRUE, pattern = "*chlamy.csv")
  
  # Use lapply to loop over each file
  all_data <- lapply(files, function(x) {
    # Check if the file has more than one row (i.e., it has data)
    if (length(readLines(x)) > 1) {
      # If the file has data, read it in
      data <- read_csv(x, col_types = cols())
      # Add the path and species information
      data <- mutate(data, Path = x)
      return(data)
    } else {
      # If the file has no data, return NULL
      NULL
    }
  })
  
  # Remove NULLs (empty data frames) from the list
  all_data <- all_data[!sapply(all_data, is.null)]
  
  # Combine all data frames in the list into one data frame
  all_data <- bind_rows(all_data)
  
  return(all_data)
}

In [4]:
# List of experiments
experiments <- c("230509_motility_pools_exp1", "230516_motility_pools_exp2", 
                 "230519_motility_pools_exp3", "230523_motility_pools_exp4")

In [5]:
# Iterate over the experiments
for (experiment_name in experiments) {
  process_experiment(experiment_name)
}

[1] "Read 3724 rows for species cr in 230509_motility_pools_exp1"
[1] "Read 2801 rows for species cs in 230509_motility_pools_exp1"
[1] "Processing species cr and column AreaShape_Area for experiment 230509_motility_pools_exp1"


“[1m[22mUsing an external vector in selections was deprecated in tidyselect 1.1.0.
[36mℹ[39m Please use `all_of()` or `any_of()` instead.
  # Was:
  data %>% select(col)

  # Now:
  data %>% select(all_of(col))

See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.”


[1] "Writing file 230509_motility_pools_exp1_cr_AreaShape_Area.csv"
[1] "Processing species cr and column AreaShape_Perimeter for experiment 230509_motility_pools_exp1"
[1] "Writing file 230509_motility_pools_exp1_cr_AreaShape_Perimeter.csv"
[1] "Processing species cr and column AreaShape_Eccentricity for experiment 230509_motility_pools_exp1"
[1] "Writing file 230509_motility_pools_exp1_cr_AreaShape_Eccentricity.csv"
[1] "Processing species cr and column AreaShape_MajorAxisLength for experiment 230509_motility_pools_exp1"
[1] "Writing file 230509_motility_pools_exp1_cr_AreaShape_MajorAxisLength.csv"
[1] "Processing species cr and column AreaShape_MinorAxisLength for experiment 230509_motility_pools_exp1"
[1] "Writing file 230509_motility_pools_exp1_cr_AreaShape_MinorAxisLength.csv"
[1] "Processing species cs and column AreaShape_Area for experiment 230509_motility_pools_exp1"
[1] "Writing file 230509_motility_pools_exp1_cs_AreaShape_Area.csv"
[1] "Processing species cs and column Area