In [None]:
import numpy as np
import networkx as nx
from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize, OneHotEncoder
import tensorflow as tf
from tensorflow.keras import layers, Model
import community as community_louvain

# --- Paso 1: Algoritmo k-truss ---
def k_truss(graph, k):
    G = graph.copy()
    while True:
        edges_to_remove = []
        for u, v in G.edges():
            common_neighbors = set(G.neighbors(u)).intersection(G.neighbors(v))
            if len(common_neighbors) < k - 2:
                edges_to_remove.append((u, v))
        if not edges_to_remove:
            break
        G.remove_edges_from(edges_to_remove)
    return G

# --- Paso 2: Autoencoder Variacional ---
class VariationalAutoencoder(Model):
    def __init__(self, original_dim, latent_dim, num_clusters):
        super().__init__()
        self.encoder = tf.keras.Sequential([
            layers.InputLayer(shape=(original_dim,)),
            layers.Dense(128, activation='relu'),
            layers.Dense(64, activation='relu'),
            layers.Dense(latent_dim * 2),  # mean and logvar concatenados
        ])
        self.decoder_reconstruction = tf.keras.Sequential([
            layers.InputLayer(shape=(latent_dim,)),
            layers.Dense(64, activation='relu'),
            layers.Dense(128, activation='relu'),
            layers.Dense(original_dim, activation='sigmoid'),
        ])
        self.decoder_classification = tf.keras.Sequential([
            layers.InputLayer(shape=(latent_dim,)),
            layers.Dense(num_clusters, activation='softmax'),
        ])

    def call(self, x):
        mean_logvar = self.encoder(x)
        mean, logvar = tf.split(mean_logvar, num_or_size_splits=2, axis=1)
        z = self.reparameterize(mean, logvar)
        reconstructed = self.decoder_reconstruction(z)
        classification = self.decoder_classification(z)
        return reconstructed, classification, mean, logvar

    @staticmethod
    def reparameterize(mean, logvar):
        eps = tf.random.normal(shape=tf.shape(mean))
        return eps * tf.exp(logvar * 0.5) + mean

def vae_loss(x, reconstructed, classification, mean, logvar, true_labels_onehot):
    reconstruction_loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(x, reconstructed))
    kl_divergence = -0.5 * tf.reduce_mean(1 + logvar - tf.square(mean) - tf.exp(logvar))
    classification_loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(true_labels_onehot, classification))
    return reconstruction_loss + kl_divergence + classification_loss

# --- Algoritmo CSEA ---
def csea_algorithm(graph, latent_dim=10, num_epochs=100, true_labels=None):
    # Obtener etiquetas con Louvain si no se dan
    if true_labels is None:
        partition = community_louvain.best_partition(graph, weight='weight')
        true_labels = np.array([partition[node] for node in sorted(graph.nodes())])
    num_clusters = len(np.unique(true_labels))
    latent_dim = min(latent_dim, num_clusters * 2)

    # Paso 1: k-truss para similitud
    k_truss_graph = k_truss(graph, k=3)
    adjacency_matrix = nx.to_numpy_array(k_truss_graph, nodelist=sorted(graph.nodes()))
    similarity_matrix = normalize(adjacency_matrix + np.eye(adjacency_matrix.shape[0]))

    # One-hot para etiquetas
    encoder = OneHotEncoder(sparse=False)
    true_labels_onehot = encoder.fit_transform(true_labels.reshape(-1, 1))

    # Crear modelo VAE
    vae = VariationalAutoencoder(similarity_matrix.shape[1], latent_dim, num_clusters)
    optimizer = tf.keras.optimizers.Adam()

    for epoch in range(num_epochs):
        with tf.GradientTape() as tape:
            reconstructed, classification, mean, logvar = vae(similarity_matrix.astype(np.float32))
            loss = vae_loss(similarity_matrix.astype(np.float32), reconstructed, classification, mean, logvar, true_labels_onehot)
        grads = tape.gradient(loss, vae.trainable_variables)
        optimizer.apply_gradients(zip(grads, vae.trainable_variables))
        if epoch % 20 == 0:
            print(f"Epoch {epoch}: Loss = {loss.numpy():.4f}")

    # Obtener embedding latente
    mean_logvar = vae.encoder(similarity_matrix.astype(np.float32))
    mean, _ = tf.split(mean_logvar, 2, axis=1)
    latent_features = mean.numpy()

    # KMeans sobre embedding
    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(latent_features)

    return cluster_labels, latent_features

# --- EJEMPLO DE USO ---

# Crear grafo de ejemplo
G = nx.Graph()
edges = [(0,1),(1,2),(2,0),  # triángulo
         (2,3),(3,4),(4,5),(5,3)]  # cuadrado conectado
G.add_edges_from(edges)

# Ejecutar CSEA
labels, embeddings = csea_algorithm(G, latent_dim=4, num_epochs=100)

print("\nEtiquetas de comunidades asignadas:")
for node, label in zip(sorted(G.nodes()), labels):
    print(f"Nodo {node}: Comunidad {label}")

print("\nEmbedding latente de cada nodo:")
print(embeddings)
