# 1 download DDEs for plotting

In [1]:
# import os

# url_j = "https://github.com/Daniel-Xkan/potts_model_test/raw/main/J.npy"
# url_seq = "https://github.com/Daniel-Xkan/potts_model_test/raw/main/in.reduce4.seq"
# url_consensus = "https://github.com/Daniel-Xkan/potts_model_test/raw/main/in.consensus.reduce4.seq"
# url_nl43 = "https://github.com/Daniel-Xkan/potts_model_test/raw/main/in.nl43.reduce4.seq"
# url_DDE = "https://github.com/Daniel-Xkan/potts_model_test/raw/main/kn.her2.all"

# # Suppress output and check for success
# if os.system(f"wget -q {url_j}") == 0:
#     print(f"Successfully downloaded {url_j}")
# else:
#     print(f"Failed to download {url_j}")

# if os.system(f"wget -q {url_seq}") == 0:
#     print(f"Successfully downloaded {url_seq}")
# else:
#     print(f"Failed to download {url_seq}")

# if os.system(f"wget -q {url_consensus}") == 0:
#     print(f"Successfully downloaded {url_consensus}")
# else:
#     print(f"Failed to download {url_consensus}")

# if os.system(f"wget -q {url_nl43}") == 0:
#     print(f"Successfully downloaded {url_nl43}")
# else:
#     print(f"Failed to download {url_nl43}")

# if os.system(f"wget -q {url_DDE}") == 0:
#     print(f"Successfully downloaded {url_DDE}")
# else:
#     print(f"Failed to download {url_DDE}")

#2. Circos


##2.1 read in data from kn.her2.all

In [2]:
import pandas as pd
import numpy as np

# Read the data from kn.her2.all, reading the first column as the identifier and the second as the DDE value
# Assuming the file is space-separated and the relevant columns are the first and second (0-indexed)
try:
    dde_df = pd.read_csv("kn.her2.all", sep='\s+', header=None, usecols=[0, 1], names=['identifier', 'dde_value'], dtype={'identifier': str})
except Exception as e:
    print(f"Error reading file: {e}")
    print("Could not read the file with space separator. Please check the file format.")
    # Attempt to read with a different separator if space doesn't work
    try:
        dde_df = pd.read_csv("kn.her2.all", sep=',', header=None, usecols=[0, 1], names=['identifier', 'dde_value'], dtype={'identifier': str})
    except Exception as e_comma:
        print(f"Error reading file with comma separator: {e_comma}")
        # If both fail, print a generic error and exit
        print("Could not read the file with space or comma separators. Please check the file format.")
        exit()


# Convert the dde_value column to numeric, coercing errors
dde_df['dde_value'] = pd.to_numeric(dde_df['dde_value'], errors='coerce')

# Drop rows where dde_value could not be converted (optional, but good for clean data)
dde_df.dropna(subset=['dde_value'], inplace=True)

# Apply the filter (absolute DDE > 4.5) - Adding this filtering step back
filtered_dde_df = dde_df[np.abs(dde_df['dde_value']) > 4.5]


# Print the head of the dataframe
print("Original Data Head:")
display(dde_df.head())

print("\nFiltered Data Head (abs(DDE) > 4.5):")
display(filtered_dde_df.head())

print(f"\nOriginal number of interactions: {len(dde_df)}")
print(f"Number of interactions with abs(DDE) > 4.5: {len(filtered_dde_df)}")

Original Data Head:


Unnamed: 0,identifier,dde_value
0,A1B-B2A,0.012168
1,A1B-B2C,-0.000908
2,A1B-B2D,0.000125
3,A1B-A3B,0.008785
4,A1B-A3C,0.008937



Filtered Data Head (abs(DDE) > 4.5):


Unnamed: 0,identifier,dde_value
11701,B6A-C7B,6.06472
13243,B6D-C7B,6.37774
216124,D119A-C122B,5.94553
239229,D138A-D148A,4.90052
241464,C140A-D148A,5.61299



Original number of interactions: 310077
Number of interactions with abs(DDE) > 4.5: 13


## 2.2 Output Circos Links Data to File
write the prepared Circos links data to a file

In [3]:
# Prepare data for Circos links from the filtered_dde_df
circos_links = []
for index, row in filtered_dde_df.iterrows():
    identifier = row['identifier']
    dde_value = row['dde_value']

    # Parse the identifier to get positions for Circos.
    # This is based on the assumption that the identifier like 'A1B-B2A'
    # can be split into two parts, and each part can be used to derive a numerical position.
    # This logic might need adjustment based on the actual format of your identifiers
    # and your Circos configuration.

    try:
        parts = identifier.split('-')
        if len(parts) == 2:
            pos1_id = parts[0]
            pos2_id = parts[1]

            # Placeholder for extracting numerical positions (replace with your logic)
            # Attempt to extract digits from the end of the identifier parts
            num_pos1 = int(''.join(filter(str.isdigit, pos1_id))) if any(char.isdigit() for char in pos1_id) else 0
            num_pos2 = int(''.join(filter(str.isdigit, pos2_id))) if any(char.isdigit() for char in pos2_id) else 0

            # Placeholder region name (replace with your Circos region names)
            region = 'seq' # Example region name

            # Circos link format: region start end region start end [attributes]
            # We'll use the extracted numerical positions as start and end for simplicity.
            circos_links.append(f"{region} {num_pos1} {num_pos1} {region} {num_pos2} {num_pos2} dde={dde_value}")

        else:
            # Handle identifiers that don't split into two parts
            print(f"Identifier '{identifier}' does not have the expected format (e.g., 'A-B'). Skipping.")
            continue # Skip if the identifier format is not as expected

    except ValueError:
         print(f"Could not parse numerical positions from identifier parts {pos1_id} or {pos2_id}. Skipping link.")
         continue # Skip this link if parsing fails
    except Exception as e:
        print(f"Error processing identifier '{identifier}': {e}. Skipping link.")
        continue # Skip this link if any error occurs during processing


print(f"Prepared {len(circos_links)} links for Circos plotting.")

# # Define the output file name
# output_file = "circos_links_with_ids.txt"

# # Write the Circos links to the file with IDs
# with open(output_file, "w") as f:
#     for i, link in enumerate(circos_links):
#         # Add a unique ID to each link line (e.g., link_id = i+1)
#         f.write(f"link_{i+1} {link}\n")

# print(f"Circos links with IDs saved to {output_file}")

#########################################################################


# Define the output file name for TSV
output_file_tsv = "circos_links.tsv"

# Write the Circos links data to a TSV file
with open(output_file_tsv, "w") as f:
    # Write header - Adjust header based on the parsed components of your Circos links
    f.write("link_id\tregion1\tstart1\tend1\tregion2\tstart2\tend2\tattributes\n")

    for i, link in enumerate(circos_links):
        link_id = f"link_{i+1}"
        # Parse the Circos link string - This parsing logic needs to match the format of your circos_links
        # Assuming the format is "region1 start1 end1 region2 start2 end2 attributes"
        parts = link.split()
        if len(parts) >= 6:
            region1 = parts[0]
            start1 = parts[1]
            end1 = parts[2]
            region2 = parts[3]
            start2 = parts[4]
            end2 = parts[5]
            attributes = " ".join(parts[6:]) if len(parts) > 6 else ""

            # Write data row in TSV format
            f.write(f"{link_id}\t{region1}\t{start1}\t{end1}\t{region2}\t{start2}\t{end2}\t{attributes}\n")
        else:
            print(f"Could not parse Circos link string: {link}. Skipping.")


print(f"Circos links data saved to {output_file_tsv} in TSV format.")

Prepared 13 links for Circos plotting.
Circos links data saved to circos_links.tsv in TSV format.


# PyCircos generate

In [10]:
%matplotlib inline

In [11]:
import pycircos
import matplotlib.pyplot as plt
Garc    = pycircos.Garc
Gcircle = pycircos.Gcircle

In [None]:
#Set chromosomes
circle = Gcircle(figsize=(8,8)) 
with open("circos_links.tsv") as f:
    f.readline()
    for line in f:
        line   = line.rstrip().split("\t") 
        name   = line[0]
        length = int(line[-1]) 
        arc    = Garc(arc_id=name, size=length, interspace=2, raxis_range=(935,985), labelposition=80, label_visible=True)
        circle.add_garc(arc) 

<Figure size 800x800 with 0 Axes>