In [1]:
import pandas as pd
import networkx as nx

# Cargar el dataset
df = pd.read_csv("Keywords/author_keyword_matrix.csv", index_col=0)

# Transponer para tener keywords como filas
keyword_matrix = df.fillna(0)

# Crear grafo vacío
G = nx.Graph()

# Añadir nodos (autores)
for author in keyword_matrix.index:
    G.add_node(author)

# Calcular similitud entre pares de autores (keywords compartidas)
authors = keyword_matrix.index.tolist()

for i, author1 in enumerate(authors):
    for j in range(i + 1, len(authors)):
        author2 = authors[j]
        # Dos opciones: número de keywords compartidas o suma de ocurrencias compartidas
        shared_keywords = (keyword_matrix.loc[author1] > 0) & (keyword_matrix.loc[author2] > 0)
        weight = (keyword_matrix.loc[author1][shared_keywords] + keyword_matrix.loc[author2][shared_keywords]).sum()

        if weight > 0:
            G.add_edge(author1, author2, weight=weight)

In [16]:
import pandas as pd
import networkx as nx
import os
from pyvis.network import Network
from glob import glob
import time

clustering_files = glob("Keywords/Clusters/*.csv")  # Ajusta si tus archivos están en otra carpeta

for file in clustering_files:
    time.sleep(1)
    base_name = os.path.splitext(os.path.basename(file))[0]
    model_name = base_name.split('_')[0].lower()
    clustering_df = pd.read_csv(file)

    if model_name == "dbscan":
        for eps_value in clustering_df["eps"].unique():
            subset = clustering_df[clustering_df["eps"] == eps_value]
            cluster_dict = dict(zip(subset["author"], subset["cluster"]))
            n_clusters = len(set(c for c in subset["cluster"] if c != -1))

            filename = f"HTML/dbscan_k{n_clusters}.html"
            title = f"Red de Coocurrencias de Palabras Clave (DBSCAN, k = {n_clusters})"
            
            # Crear red
            net = Network(height='700px', width='100%', directed=False)
            net.barnes_hut()
            net.set_options("""
                {
                  "nodes": {
                    "font": { "size": 16, "face": "Tahoma" },
                    "shape": "dot"
                  },
                  "edges": {
                    "color": { "inherit": true },
                    "smooth": false
                  },
                  "interaction": {
                    "hover": true,
                    "tooltipDelay": 200
                  },
                  "physics": {
                    "barnesHut": {
                      "gravitationalConstant": -12000,
                      "springLength": 250,
                      "springConstant": 0.02,
                      "damping": 0.6
                    },
                    "minVelocity": 0.75,
                    "stabilization": { "iterations": 250 }
                  }
                }
                """)

            for node in G.nodes():
                cluster = cluster_dict.get(node, -1)
                net.add_node(
                    node,
                    label=node,
                    title=f"{node} (Grupo {cluster})",
                    group=int(cluster) if cluster != -1 else -1,
                    size=20
                )
            for u, v, data in G.edges(data=True):
                net.add_edge(u, v, value=int(data.get('weight', 1)))

            os.makedirs(os.path.dirname(filename), exist_ok=True)
            net.write_html(filename)

            with open(filename, "r", encoding="utf-8") as f:
                html = f.read()

            html = html.replace("<body>", f"<body>\n<h2 style='text-align:center; font-family:Tahoma;'>{title}</h2>\n")

            with open(filename, "w", encoding="utf-8") as f:
                f.write(html)

    else:
      n_clusters = clustering_df['k'] if 'k' in clustering_df.columns else clustering_df['n_clusters']
      for k_value in n_clusters.unique():
          subset = clustering_df[n_clusters == k_value]
          cluster_dict = dict(zip(subset['author'], subset['cluster']))

          # Crear red visual
          net = Network(height='700px', width='100%', directed=False)
          net.barnes_hut()

          net.set_options("""
          {
            "nodes": {
              "font": { "size": 16, "face": "Tahoma" },
              "shape": "dot"
            },
            "edges": {
              "color": { "inherit": true },
              "smooth": false
            },
            "interaction": {
              "hover": true,
              "tooltipDelay": 200
            },
            "physics": {
              "barnesHut": {
                "gravitationalConstant": -12000,
                "springLength": 250,
                "springConstant": 0.02,
                "damping": 0.6
              },
              "minVelocity": 0.75,
              "stabilization": { "iterations": 250 }
            }
          }
          """)

          # Añadir nodos
          for node in G.nodes():
            cluster = cluster_dict.get(node, -1)
            net.add_node(
                node,
                label=node,
                title=f"{node} (Grupo {cluster})",
                group=int(cluster) if cluster != -1 else -1,
                size=20
            )

          # Añadir aristas
          for u, v, data in G.edges(data=True):
              net.add_edge(u, v, value=int(data.get('weight', 1)))

          # Guardar HTML
          filename = f"HTML/{model_name}_k{k_value}.html"
          os.makedirs(os.path.dirname(filename), exist_ok=True)
          net.write_html(filename)

          # Inyectar título personalizado
          with open(filename, "r", encoding="utf-8") as f:
              html = f.read()

          titulo_html = f"<h2 style='text-align:center; font-family:Tahoma;'>Red de Coocurrencias de Palabras Clave ({model_name}, k = {k_value})</h2>\n"
          html = html.replace("<body>", f"<body>\n{titulo_html}")

          with open(filename, "w", encoding="utf-8") as f:
              f.write(html)

print("✅ Redes generadas en la carpeta 'HTML'.")

✅ Redes generadas en la carpeta 'HTML'.
