In [1]:
import pandas as pd
import numpy as np

In [2]:
df1 = pd.read_table('benchmark_data/Chow_Rep1.genes.results')
df2 = pd.read_table('benchmark_data/Chow_Rep2.genes.results')
df3 = pd.read_table('benchmark_data/Chow_Rep3.genes.results')
df4 = pd.read_table('benchmark_data/HFD_Rep1.genes.results')
df5 = pd.read_table('benchmark_data/HFD_Rep2.genes.results')
df6 = pd.read_table('benchmark_data/HFD_Rep3.genes.results')

In [3]:
import pandas as pd

def merge_dataframes_on_column(dfs, join_column, columns_to_keep, column_name):
    """
    Merge multiple DataFrames on a specified join column and keep only specified columns.

    Args:
    dfs (list of pd.DataFrame): List of DataFrames to merge.
    join_column (str): Name of the column to join on.
    columns_to_keep (list of list of str): List of lists, where each sublist contains the names of the columns
                                           to keep from the corresponding DataFrame.

    Returns:
    pd.DataFrame: A DataFrame resulting from merging all input DataFrames on the join_column and 
                  keeping only the specified columns from each.
    """
    # Ensure columns_to_keep is valid
    if len(dfs) != len(columns_to_keep):
        raise ValueError("Each DataFrame must have a corresponding list of columns to keep.")

    # Reduce the dataframes to only the columns to keep and the join column
    reduced_dfs = [
        df[[join_column] + columns] for df, columns in zip(dfs, columns_to_keep)
        if join_column in df.columns and all(col in df.columns for col in columns)
    ]
    
    # Merge all dataframes on the join column
    merged_df = reduced_dfs[0]
    for df in reduced_dfs[1:]:
        merged_df = pd.merge(merged_df, df, on=join_column, how='inner')
    merged_df.columns = [merged_df.columns[0]] + column_name
    
    return merged_df

# Example usage:
# Assuming you have three DataFrames: df1, df2, df3
# You want to merge them on 'id' column and keep certain columns from each DataFrame:
# result_df = merge_dataframes_on_column([df1, df2, df3], 'id', [['name', 'age'], ['address'], ['salary']])


In [4]:
df_list = [df1, df2, df3, df4, df5, df6]

In [5]:
column_name = ['Chow_Rep1', 'Chow_Rep1', 'Chow_Rep1', 'HFD_Rep1', 'HFD_Rep1', 'HFD_Rep1']

In [6]:
merge_dataframes_on_column(df_list, 'gene_id', [['TPM'] for _ in df_list], column_name)

  merged_df = pd.merge(merged_df, df, on=join_column, how='inner')


Unnamed: 0,gene_id,Chow_Rep1,Chow_Rep1.1,Chow_Rep1.2,HFD_Rep1,HFD_Rep1.1,HFD_Rep1.2
0,ENSMUSG00000000001,36.15,34.04,38.34,31.80,43.21,37.89
1,ENSMUSG00000000003,0.00,0.00,0.00,0.00,0.00,0.00
2,ENSMUSG00000000028,0.69,2.01,0.55,0.88,0.64,0.92
3,ENSMUSG00000000031,0.00,0.23,0.00,0.13,0.12,0.19
4,ENSMUSG00000000037,0.00,0.02,0.00,0.00,0.06,0.00
...,...,...,...,...,...,...,...
39012,ENSMUSG00000099329,0.00,2.76,0.36,0.00,2.72,1.33
39013,ENSMUSG00000099330,0.00,0.00,0.00,0.00,0.00,0.00
39014,ENSMUSG00000099331,0.00,0.00,0.00,0.00,0.00,0.00
39015,ENSMUSG00000099332,0.00,0.00,0.00,0.00,0.00,0.00


In [7]:
import simple_teximport

In [8]:
file_list = ['benchmark_data/Chow_Rep1.genes.results', 'benchmark_data/Chow_Rep2.genes.results', 'benchmark_data/Chow_Rep3.genes.results', 'benchmark_data/HFD_Rep1.genes.results', 'benchmark_data/HFD_Rep2.genes.results', 'benchmark_data/HFD_Rep3.genes.results']

In [11]:
txi = simple_teximport.simple_texi(file_list, column_name)

  merged_df = pd.merge(merged_df, df, on=join_column, how='inner')
  merged_df = pd.merge(merged_df, df, on=join_column, how='inner')
  merged_df = pd.merge(merged_df, df, on=join_column, how='inner')


In [12]:
print(txi)

abundance information: [(39017, 7), Index(['gene_id', 'Chow_Rep1', 'Chow_Rep1', 'Chow_Rep1', 'HFD_Rep1',
       'HFD_Rep1', 'HFD_Rep1'],
      dtype='object')] 
 count information: [(39017, 7), Index(['gene_id', 'Chow_Rep1', 'Chow_Rep1', 'Chow_Rep1', 'HFD_Rep1',
       'HFD_Rep1', 'HFD_Rep1'],
      dtype='object')] 
 length information: [(39017, 7), Index(['gene_id', 'Chow_Rep1', 'Chow_Rep1', 'Chow_Rep1', 'HFD_Rep1',
       'HFD_Rep1', 'HFD_Rep1'],
      dtype='object')]
