In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering
from itertools import combinations

# Step 1: Load the dataset
data = pd.read_csv("gene_expression_data.csv")  # Replace "gene_expression_data.csv" with the path to your dataset

# Step 2: Preprocess the data
# Remove any missing values and irrelevant features
data.dropna(inplace=True)
# Normalize the gene expression data
scaler = StandardScaler()
normalized_data = scaler.fit_transform(data.iloc[:, 1:])  # Assuming gene expression data starts from the second column

# Step 3: Apply CLIQUE clustering
def apply_clique_clustering(data, dimensions):
    clusters = []
    for dims in combinations(range(data.shape[1]), dimensions):
        # Perform clustering for each combination of dimensions
        clustering = AgglomerativeClustering(n_clusters=5, linkage='ward')  # You can adjust parameters as needed
        cluster_labels = clustering.fit_predict(data[:, dims])
        clusters.append(cluster_labels)
    return clusters

# Step 4: Identify key genes
def identify_key_genes(clusters):
    key_genes = set()
    for cluster_labels in clusters:
        unique_labels = set(cluster_labels)
        for label in unique_labels:
            # Get the genes belonging to each cluster
            cluster_genes = data.iloc[cluster_labels == label, 0]  # Assuming the first column contains gene names
            if len(cluster_genes) > 1:  # Considering clusters with more than one gene
                key_genes.update(cluster_genes)
    return key_genes

# Apply CLIQUE clustering for different dimensions
dimensions_list = [2, 3]  # Example: clustering in 2 and 3 dimensions
all_clusters = []
for dimensions in dimensions_list:
    clusters = apply_clique_clustering(normalized_data, dimensions)
    all_clusters.extend(clusters)

# Identify key genes across different dimensions
key_genes = identify_key_genes(all_clusters)

# Step 5: Output the key genes
print("Key Genes:")
for gene in key_genes:
    print(gene)


Key Genes:
ABCC8
AGPAT5_1
ABCD2
ADGB-DT
ADIPOR1
ACTL7A
ACBD6
ADH5
ABCC2
ACSS1
AKAP17BP
AAMDC
AKAP3
AGBL1-AS1
AKAP6
ABHD2
ADPGK-AS1
ADH7
AGER_3
ADGRA2
AGPAT1_1
ADCY4
ACADL
ADTRP
AIG1
ACTR3BP5_1
AGBL1
ADRB2
ABRA
ADAM2_1
ACAA2
AGER
ACTG2
ACTRT3
AASDH
AASDHPPT
AATK
ABR
ADCY6
ADAMTS13
ADAMTS17
ABI3
ADAMTS2_1
AKAP17A
ABCC10
ABCA13
ACSL4
AGGF1
AGL
ABCB5
AIM2
ACRV1
ABCC9
ABRAXAS2
ABCF1-DT
ACADS
ADAMTS4
ADCY9
ABCC11
AGAP12P_1
AKAP9
AKAP17A_1
AGBL2_1
ADAM18
ADGRB1
ADAMTSL3_1
ADH1A
ADRM1
ADAMTS13_1
AIF1
ACTA1
ADAM12
ACTL8
ACTR1B_1
ADCY8
AHRR
ABRACL
ADIPOR2_1
ADM5
AHSG
ABHD18
AKAP12
AGER_6
ABHD6
ABHD5
ACBD7
ACOT2
ADAR
ADAM23
ADGRG4
ACAP2
ADAMTS9
ACSM6
ABHD13
ACOT13
ACTN1-DT
AATBC
ABHD1
ADGRD1
ACTA2-AS1
ABCB11
ADAM7
ABCA5
ACSM5
AHNAK2
ADRA2B
AIRN
ADGRE3
AGTRAP
ACTN4_1
AGA-DT
ADRB1
AGXT2
ACOT1
ADCK1
AKIRIN1
ADIPOQ-AS1
ABCF1_5
AIDA
ABCC12
AGER_2
ABCB11_1
ADGRF4
ADAM6_1
ACTR3C
AIF1_1
ABCA10
ACSM1
ACSM4
ADSL
ACYP2
ADGRL2
ADAMTS9-AS1
ACYP1
ABCF1_3
ABHD16A_1
ACAD8
ACCSL
ABHD17B
ADGRA1-AS1
ACSF2
ADRB3
ABC