In [1]:
import os
import pandas as pd

In [4]:
base_dir = "/Users/prasanthkumar/Desktop/Projects/RNA-Seq/quantification_1/salmon"

sample_dirs = [os.path.join(base_dir, d) for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

merged_data = {}

for sample_dir in sample_dirs:
    sample_id = os.path.basename(sample_dir)  
    quant_file = os.path.join(sample_dir, "quant.sf") 

    if os.path.isfile(quant_file):
        try:
            df = pd.read_csv(quant_file, sep='\t', usecols=["Name", "NumReads"])
            df.rename(columns={"NumReads": sample_id}, inplace=True)

            if "Name" not in merged_data:
                merged_data["Name"] = df["Name"] 
            merged_data[sample_id] = df[sample_id]

            print(f"Successfully added {sample_id} to the merged dataset.")
        except Exception as e:
            print(f"Error processing {quant_file}: {e}")
    else:
        print(f"quant.sf file not found in {sample_dir}, skipping.")

merged_df = pd.DataFrame(merged_data)

output_file = "/Users/prasanthkumar/Desktop/Projects/RNA-Seq/quantification/merged_quant_counts.csv"
merged_df.to_csv(output_file, index=False)

print(f"Merged quant.sf data (raw counts) saved to {output_file}")

Successfully added SRR12375106 to the merged dataset.
Successfully added SRR12375101 to the merged dataset.
Successfully added SRR12375100 to the merged dataset.
Successfully added SRR12375093 to the merged dataset.
Successfully added SRR12375094 to the merged dataset.
Successfully added SRR12375095 to the merged dataset.
Successfully added SRR12375092 to the merged dataset.
Successfully added SRR12375102 to the merged dataset.
Successfully added SRR12375105 to the merged dataset.
Successfully added SRR12375104 to the merged dataset.
Successfully added SRR12375103 to the merged dataset.
Successfully added SRR12375097 to the merged dataset.
Successfully added SRR12375099 to the merged dataset.
Successfully added SRR12375098 to the merged dataset.
Successfully added SRR12375096 to the merged dataset.
Merged quant.sf data (raw counts) saved to /Users/prasanthkumar/Desktop/Projects/RNA-Seq/quantification/merged_quant_counts.csv


In [6]:
base_dir = "/Users/prasanthkumar/Desktop/Projects/RNA-Seq/quantification/kallisto_filtered"

sample_dirs = [os.path.join(base_dir, d) for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

merged_data = {}

for sample_dir in sample_dirs:
    sample_id = os.path.basename(sample_dir) 
    abundance_file = os.path.join(sample_dir, "abundance.tsv") 

    if os.path.isfile(abundance_file): 
        try:
            df = pd.read_csv(abundance_file, sep='\t', usecols=["target_id", "est_counts"])
            df.rename(columns={"est_counts": sample_id}, inplace=True)

            if "target_id" not in merged_data:
                merged_data["target_id"] = df["target_id"] 
            merged_data[sample_id] = df[sample_id]

            print(f"Successfully added {sample_id} to the merged dataset.")
        except Exception as e:
            print(f"Error processing {abundance_file}: {e}")
    else:
        print(f"abundance.tsv file not found in {sample_dir}, skipping.")

merged_df = pd.DataFrame(merged_data)

output_file = "/Users/prasanthkumar/Desktop/Projects/RNA-Seq/quantification/Kallisto_filtered_merged_kallisto_counts.csv"
merged_df.to_csv(output_file, index=False)

print(f"Merged abundance.tsv data (raw counts) saved to {output_file}")

Successfully added SRR12375106 to the merged dataset.
Successfully added SRR12375101 to the merged dataset.
Successfully added SRR12375100 to the merged dataset.
Successfully added SRR12375093 to the merged dataset.
Successfully added SRR12375094 to the merged dataset.
Successfully added SRR12375095 to the merged dataset.
Successfully added SRR12375092 to the merged dataset.
Successfully added SRR12375102 to the merged dataset.
Successfully added SRR12375105 to the merged dataset.
Successfully added SRR12375104 to the merged dataset.
Successfully added SRR12375103 to the merged dataset.
Successfully added SRR12375097 to the merged dataset.
Successfully added SRR12375099 to the merged dataset.
Successfully added SRR12375098 to the merged dataset.
Successfully added SRR12375096 to the merged dataset.
Merged abundance.tsv data (raw counts) saved to /Users/prasanthkumar/Desktop/Projects/RNA-Seq/quantification/Kallisto_filtered_merged_kallisto_counts.csv
