In [None]:
#6  .beds

# Install required packages
!pip install pyranges upsetplot

# Imports
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files
from upsetplot import UpSet, from_indicators
import pyranges as pr
from itertools import product

# Step 1: Upload BED files
print("📁 Please upload your 6 BED files")
uploaded = files.upload()

# Step 2: Read files into a dictionary with custom condition names
condition_beds = {}
for file in uploaded:
    condition_name = input(f"Enter a custom name for condition '{file}': ")
    df = pd.read_csv(file, sep='\t', header=None, usecols=[0,1,2], names=['Chromosome', 'Start', 'End'])
    condition_beds[condition_name] = pr.PyRanges(df)

# Step 3: Create union of all ranges across all conditions
print("🔁 Computing overlaps (≥1bp)...")
all_ranges = list(condition_beds.values())[0]
for cond_range in list(condition_beds.values())[1:]:
    all_ranges = all_ranges.union(cond_range)

# Step 4: Create binary membership matrix
presence_matrix = pd.DataFrame()

# Convert all_ranges to a DataFrame
all_ranges_df = all_ranges.df[['Chromosome', 'Start', 'End']].copy()

# Check overlap for each condition and add to matrix
for name, gr in condition_beds.items():
    overlap = all_ranges.overlap(gr)
    overlap_df = overlap.df[['Chromosome', 'Start', 'End']]
    overlap_keys = set(zip(overlap_df.Chromosome, overlap_df.Start, overlap_df.End))
    presence_matrix[name] = all_ranges_df.apply(
        lambda row: (row.Chromosome, row.Start, row.End) in overlap_keys, axis=1
    )

# Step 5: Create UpSet plot data
upset_data = from_indicators(presence_matrix, data=None)

# Step 6: Customize plot title
plot_title = input("Enter a custom title for your UpSet plot: ")

# Step 7: Generate and show UpSet plot
plt.figure(figsize=(12, 6))
upset = UpSet(upset_data, subset_size='count', show_counts=True)
upset.plot()
plt.suptitle(plot_title, fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
#5 .beds

# Install required packages
!pip install pyranges upsetplot

# Imports
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files
from upsetplot import UpSet, from_indicators
import pyranges as pr

# Step 1: Upload BED files
print("📁 Please upload your 5 BED files")
uploaded = files.upload()

# Step 2: Read files into a dictionary with custom condition names
condition_beds = {}
for file in uploaded:
    condition_name = input(f"Enter a custom name for condition '{file}': ")
    df = pd.read_csv(file, sep='\t', header=None, usecols=[0,1,2], names=['Chromosome', 'Start', 'End'])
    condition_beds[condition_name] = pr.PyRanges(df)

# Step 3: Combine all ranges from all conditions into one PyRanges object
print("🔁 Computing overlaps (≥1bp)...")
all_ranges_df = pd.concat([gr.df for gr in condition_beds.values()], ignore_index=True)
all_ranges = pr.PyRanges(all_ranges_df).merge()  # Merge overlapping intervals

# Step 4: Create binary presence/absence matrix
presence_matrix = pd.DataFrame()
all_ranges_df = all_ranges.df[['Chromosome', 'Start', 'End']].copy()

for name, gr in condition_beds.items():
    overlap = all_ranges.overlap(gr)
    overlap_keys = set(zip(overlap.df.Chromosome, overlap.df.Start, overlap.df.End))
    presence_matrix[name] = all_ranges_df.apply(
        lambda row: (row.Chromosome, row.Start, row.End) in overlap_keys, axis=1
    )

# Step 5: Create UpSet data
upset_data = from_indicators(presence_matrix, data=None)

# Step 6: Custom title
plot_title = input("Enter a custom title for your UpSet plot: ")

# Step 7: Generate and show the plot
plt.figure(figsize=(12, 6))
upset = UpSet(upset_data, subset_size='count', show_counts=True)
upset.plot()
plt.suptitle(plot_title, fontsize=14)
plt.tight_layout()
plt.show()


In [None]:
#4 .beds

# Install required packages
!pip install pyranges upsetplot

# Imports
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files
from upsetplot import UpSet, from_indicators
import pyranges as pr

# Step 1: Upload BED files
print("📁 Please upload your 4 BED files")
uploaded = files.upload()

# Check for exactly 4 files
if len(uploaded) != 4:
    raise ValueError("⚠️ Please upload exactly 4 BED files.")

# Step 2: Read files into a dictionary with custom condition names
condition_beds = {}
for file in uploaded:
    condition_name = input(f"Enter a custom name for condition '{file}': ")
    df = pd.read_csv(file, sep='\t', header=None, usecols=[0,1,2], names=['Chromosome', 'Start', 'End'])
    condition_beds[condition_name] = pr.PyRanges(df)

# Step 3: Combine all ranges from all conditions into one PyRanges object
print("🔁 Computing overlaps (≥1bp)...")
all_ranges_df = pd.concat([gr.df for gr in condition_beds.values()], ignore_index=True)
all_ranges = pr.PyRanges(all_ranges_df).merge()  # Merge overlapping intervals

# Step 4: Create binary presence/absence matrix
presence_matrix = pd.DataFrame()
all_ranges_df = all_ranges.df[['Chromosome', 'Start', 'End']].copy()

for name, gr in condition_beds.items():
    overlap = all_ranges.overlap(gr)
    overlap_keys = set(zip(overlap.df.Chromosome, overlap.df.Start, overlap.df.End))
    presence_matrix[name] = all_ranges_df.apply(
        lambda row: (row.Chromosome, row.Start, row.End) in overlap_keys, axis=1
    )

# Step 5: Create UpSet data
upset_data = from_indicators(presence_matrix, data=None)

# Step 6: Custom title
plot_title = input("Enter a custom title for your UpSet plot: ")

# Step 7: Generate and show the plot
plt.figure(figsize=(12, 6))
upset = UpSet(upset_data, subset_size='count', show_counts=True)
upset.plot()
plt.suptitle(plot_title, fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
# Install required packages
!pip install pyranges upsetplot

# Imports
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files
from upsetplot import UpSet, from_indicators
import pyranges as pr

# Step 1: Upload BED files
print("📁 Please upload your 12 BED files")
uploaded = files.upload()

# Check for exactly 12 files
if len(uploaded) != 12:
    raise ValueError("⚠️ Please upload exactly 12 BED files.")

# Step 2: Read files into a dictionary with custom condition names
condition_beds = {}
for file in uploaded:
    condition_name = input(f"Enter a custom name for condition '{file}': ")
    df = pd.read_csv(file, sep='\t', header=None, usecols=[0,1,2], names=['Chromosome', 'Start', 'End'])
    condition_beds[condition_name] = pr.PyRanges(df)

# Step 3: Combine all ranges from all conditions into one PyRanges object
print("🔁 Computing overlaps (≥1bp)...")
all_ranges_df = pd.concat([gr.df for gr in condition_beds.values()], ignore_index=True)
all_ranges = pr.PyRanges(all_ranges_df).merge()  # Merge overlapping intervals

# Step 4: Create binary presence/absence matrix
presence_matrix = pd.DataFrame()
all_ranges_df = all_ranges.df[['Chromosome', 'Start', 'End']].copy()

for name, gr in condition_beds.items():
    overlap = all_ranges.overlap(gr)
    overlap_keys = set(zip(overlap.df.Chromosome, overlap.df.Start, overlap.df.End))
    presence_matrix[name] = all_ranges_df.apply(
        lambda row: (row.Chromosome, row.Start, row.End) in overlap_keys, axis=1
    )

# Step 5: Create UpSet data
upset_data = from_indicators(presence_matrix, data=None)

# Step 6: Custom title and filename input
plot_title = input("Enter a custom title for your UpSet plot: ")
save_plot = input("💾 Do you want to save the plot as a high-res PNG? (yes/no): ").strip().lower()

if save_plot == "yes":
    filename = input("Enter filename to save (e.g., 'upset_plot.png'): ")

# Step 7: Generate and optionally save the plot
plt.figure(figsize=(18, 8))  # Wider figure for more sets
upset = UpSet(upset_data, subset_size='count', show_counts=True)
upset.plot()
plt.suptitle(plot_title, fontsize=16)
plt.tight_layout()

if save_plot == "yes":
    plt.savefig(filename, dpi=300, bbox_inches='tight', transparent=True)
    print(f"✅ Plot saved as '{filename}' (300 dpi)")

plt.show()
