In [6]:
from collections import defaultdict

# Function to process the clusters and group them based on shared IDs
def process_clusters(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # A list to store all the clusters as sets of IDs
    clusters = []
    id_to_cluster = defaultdict(list)

    # Read each line and store the IDs
    current_cluster = []
    for line in lines:
        line = line.strip()
        if line:
            current_cluster.append(line)
        else:
            if current_cluster:
                clusters.append(set(current_cluster))
            current_cluster = []

    # Add the last cluster if it's not added yet
    if current_cluster:
        clusters.append(set(current_cluster))

    # Now merge clusters based on shared IDs
    merged_clusters = []
    while clusters:
        cluster = clusters.pop(0)
        # Check if it can be merged with any existing cluster
        merged = False
        for i, merged_cluster in enumerate(merged_clusters):
            if cluster & merged_cluster:  # If there's an overlap
                merged_clusters[i] |= cluster  # Merge the clusters
                merged = True
                break
        if not merged:
            merged_clusters.append(cluster)

    # Return the merged clusters as a sorted list of lists
    return [sorted(list(cluster)) for cluster in merged_clusters]

# Function to write the output in the same format as the input
def write_clusters(clusters, output_path):
    with open(output_path, 'w') as file:
        for cluster in clusters:
            for id_ in cluster:
                file.write(f"{id_}\n")  # Write each ID on a new line
            file.write("\n")  # Add an empty line between clusters

# Main function to run the processing
def main(input_file, output_file):
    clusters = process_clusters(input_file)
    write_clusters(clusters, output_file)
    print(f"Clusters have been written to {output_file}")

# Specify your input and output file paths
input_file = '8clusters.txt'
output_file = 'output.txt'

# Run the program
main(input_file, output_file)


Clusters have been written to output.txt
