In [None]:
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt

# Directory path where the .tsv files are located
directory_path = '/give_the_path_of_transcriptomic_data/'

# List of files in the directory
file_list = os.listdir(directory_path)

# Select files with the .tsv extension
tsv_files = [file for file in file_list if file.endswith(".tsv")]
print("TSV files: ", tsv_files)

# Create a set to collect all gene names
all_genes = set()

# Collect gene names from all files
for file in tsv_files:
    file_path = os.path.join(directory_path, file)
    df = pd.read_csv(file_path, sep=',')
    all_genes.update(df['SystematicName'])

# Create an empty DataFrame and set gene names as the index
merged_df = pd.DataFrame(index=list(all_genes))

# Process each TSV file and add filtered logFC values to the corresponding indexes
for file in tsv_files:
    file_path = os.path.join(directory_path, file)
    
    # Use the first part of the filename before the first underscore as the DataFrame column name
    df_name = os.path.splitext(file)[0].split('_')[0]
    print("DataFrame name: ", df_name)
    
    # Read the TSV file
    df = pd.read_csv(file_path, sep=',')
    
    # Filter rows where P.Value is less than 0.05 and the absolute logFC is greater than 1
    df = df[(df['P.Value'] < 0.05) & (df['logFC'].abs() > 1)]
    
    # Rename the logFC column to match the DataFrame name
    df = df.rename(columns={'logFC': df_name})
    
    # Set 'SystematicName' as the index
    df = df.set_index('SystematicName')
    
    # Join the filtered logFC values into the merged DataFrame
    merged_df = merged_df.join(df[[df_name]], how='left')

# Remove rows that are completely NaN (as a precaution)
merged_df = merged_df.dropna(how='all')

# Replace all missing values (NaN) with 0
merged_df = merged_df.fillna(0)

# Filter rows where the index (gene names) starts with 'Y' or 'Q'
merged_df = merged_df[merged_df.index.str.startswith(('Y', 'Q'))]

# Visualize the results as clustermap
sns.clustermap(merged_df, cmap='coolwarm', metric='correlation')
plt.show()
