In [2]:
import igraph as ig
import networkx as nx
import os

In [44]:
data_path = '../data/hashtags/edges/'
edge_files = [file for file in os.listdir(data_path) if file.endswith('.txt')]

### Generate gSpan format
```
t # 0
v 1 a
v 2 b
e 1 2 a
```

In [50]:
output_string = ''
for i, edge_file in enumerate(edge_files):
    edge_file_path = os.path.join(data_path, edge_file)
    with open(edge_file_path, 'r') as f:
        edges = f.readlines()
    users = []
    for edge in edges:
        u, v = edge.strip().split(',')
        if 'None' in v:
            continue
        u_user = u.split('/')[-3]
        v_user = v.split('/')[-3]
        users.append((u_user, v_user))

    g = ig.Graph.TupleList(users, directed=False)
    g = g.components().giant()

    output_string += f't # {i}\n'
    for v in g.vs:
        output_string += f'v {v.index} 0\n'
    for e in g.es:
        output_string += f'e {e.source} {e.target} 0\n'

In [51]:
with open('lcc.txt', 'w') as f:
    f.write(output_string)

#### Repeat but for the 5 largest components

In [46]:
output_string = ''
for i, edge_file in enumerate(edge_files):
    edge_file_path = os.path.join(data_path, edge_file)
    with open(edge_file_path, 'r') as f:
        edges = f.readlines()
    users = []
    for edge in edges:
        u, v = edge.strip().split(',')
        if 'None' in v:
            continue
        u_user = u.split('/')[-3]
        v_user = v.split('/')[-3]
        users.append((u_user, v_user))

    components = g.components()
    largest_components = sorted(components, key=len, reverse=True)[:5]
    largest_components_nodes = [v for c in largest_components for v in c]
    g_sub = g.subgraph(g.vs[largest_components_nodes])

    output_string += f't # {i}\n'
    for v in g.vs:
        output_string += f'v {v.index} 0\n'
    for e in g.es:
        output_string += f'e {e.source} {e.target} 0\n'

In [48]:
with open('lcc_top5.txt', 'w') as f:
    f.write(output_string)

#### Plot frequent subgraphs

In [53]:
with open('lcc.txt.fp', 'r') as f:
    subgraph_string = f.read()

# Split into lines for parsing
lines = subgraph_string.strip().split('\n')

# Initialize variables
graphs = []
vertices_set = set()
edges = []
current_support = None

# Iterate through lines and parse the subgraphs
for line in lines:
    if line == '':  # Skip empty lines
        continue
    parts = line.split()
    
    if parts[0] == 't':  # Start of a new subgraph
        if vertices_set or edges:
            # If we have collected vertices and edges, create a graph
            g = ig.Graph(directed=False)
            g.add_vertices(len(vertices_set))
            for edge in edges:
                g.add_edge(edge[0], edge[1])
            
            g["support"] = current_support  # Store the support of the subgraph
            graphs.append(g)
        
        # Reset for the new graph
        vertices_set = set()
        edges = []
        current_support = int(parts[4])  # Support of the current graph

    elif parts[0] == 'v':  # Vertex line
        vertex_id = int(parts[1])
        vertices_set.add(vertex_id)

    elif parts[0] == 'e':  # Edge line
        source = int(parts[1])
        target = int(parts[2])
        edges.append((source, target))

# Add the last graph after loop
if vertices_set or edges:
    g = ig.Graph(directed=False)
    g.add_vertices(len(vertices_set))
    for edge in edges:
        g.add_edge(edge[0], edge[1])
    
    g["support"] = current_support
    graphs.append(g)

In [48]:
def graph_to_tikz(G, pos):
    tikz_code = "\\begin{tikzpicture}\n"
    
    # Add nodes using \Vertex
    for node, (x, y) in pos.items():
        tikz_code += f"    \\Vertex[x={x:.2f}, y={y:.2f}]{{{node}}}\n"
    
    # Add edges using \Edge
    for u, v in G.edges():
        tikz_code += f"    \\Edge({u})({v})\n"
    
    tikz_code += "\\end{tikzpicture}"
    return tikz_code

In [36]:
def graph_to_tikz_formatted(G, pos, minipage_width=0.23, horizontal_space=0.02):
    # Start with minipage
    tikz_code = f"\\begin{{minipage}}{{{minipage_width}\\textwidth}}\n"
    tikz_code += "\\begin{tikzpicture}\n"
    
    # Add nodes using \Vertex
    for node, (x, y) in pos.items():
        tikz_code += f"    \\Vertex[x={x:.2f}, y={y:.2f}]{{{node}}}\n"
    
    # Add edges using \Edge
    for u, v in G.edges():
        tikz_code += f"    \\Edge({u})({v})\n"
    
    tikz_code += "\\end{tikzpicture}\n"
    
    # Close the minipage
    tikz_code += "\\end{minipage}\n"
    
    # Add horizontal space between minipages
    tikz_code += f"\\hspace{{{horizontal_space}\\textwidth}}"

    return tikz_code

In [73]:
def graph_to_tikz_support(G, pos, support, minipage_width=0.23, horizontal_space=0.02):
    # Start with minipage
    tikz_code = f"\\begin{{minipage}}{{{minipage_width}\\textwidth}}\n"
    tikz_code += "\\begin{tikzpicture}\n"

    # Add nodes
    for node, (x, y) in pos.items():
        tikz_code += f"    \\Vertex[x={x:.2f}, y={y:.2f}]{{{node}}}\n"

    # Add edges
    for u, v in G.edges():
        tikz_code += f"    \\Edge({u})({v})\n"

    tikz_code += "\\end{tikzpicture}\n"

    # Add support number on a separate line below the graph
    tikz_code += f"\\\\ $s$: {support}\n"

    # Close the minipage
    tikz_code += "\\end{minipage}\n"

    # Add horizontal space between minipages
    tikz_code += f"\\hspace{{{horizontal_space}\\textwidth}}"

    return tikz_code


In [74]:
full_string = '\\begin{center}\n'
for i, g in enumerate(graphs):
    if g['support'] > 20:
        g_nx = g.to_networkx()
        pos = nx.kamada_kawai_layout(g_nx, scale=0.5)
        support = g['support']
        tikz_code = graph_to_tikz_support(g_nx, pos, support, minipage_width=0.18, horizontal_space=0.01)
        full_string += tikz_code

full_string += '\\end{center}'
print(full_string)

\begin{center}
\begin{minipage}{0.18\textwidth}
\begin{tikzpicture}
    \Vertex[x=0.00, y=0.00]{0}
\end{tikzpicture}
\\ $s$: 30
\end{minipage}
\hspace{0.01\textwidth}\begin{minipage}{0.18\textwidth}
\begin{tikzpicture}
    \Vertex[x=0.50, y=0.00]{0}
    \Vertex[x=-0.50, y=-0.00]{1}
    \Edge(0)(1)
\end{tikzpicture}
\\ $s$: 30
\end{minipage}
\hspace{0.01\textwidth}\begin{minipage}{0.18\textwidth}
\begin{tikzpicture}
    \Vertex[x=0.50, y=0.29]{0}
    \Vertex[x=-0.00, y=0.00]{1}
    \Vertex[x=-0.50, y=-0.29]{2}
    \Edge(0)(1)
    \Edge(1)(2)
\end{tikzpicture}
\\ $s$: 29
\end{minipage}
\hspace{0.01\textwidth}\begin{minipage}{0.18\textwidth}
\begin{tikzpicture}
    \Vertex[x=0.50, y=0.50]{0}
    \Vertex[x=0.16, y=0.17]{1}
    \Vertex[x=-0.17, y=-0.16]{2}
    \Vertex[x=-0.50, y=-0.50]{3}
    \Edge(0)(1)
    \Edge(1)(2)
    \Edge(2)(3)
\end{tikzpicture}
\\ $s$: 25
\end{minipage}
\hspace{0.01\textwidth}\begin{minipage}{0.18\textwidth}
\begin{tikzpicture}
    \Vertex[x=0.36, y=0.50]{0}
    \V

### Generate nel format
```
v 1
v 2
e 1 2
g graph 1
x 0
```

In [43]:
output_string = ''
for i, edge_file in enumerate(edge_files):
    edge_file_path = os.path.join(data_path, edge_file)
    with open(edge_file_path, 'r') as f:
        edges = f.readlines()
    users = []
    for edge in edges:
        u, v = edge.strip().split(',')
        if 'None' in v:
            continue
        u_user = u.split('/')[-3]
        v_user = v.split('/')[-3]
        users.append((u_user, v_user))

    g = ig.Graph.TupleList(users, directed=False)
    # get largest connected component
    g = g.components().giant()

    for v in g.vs:
        output_string += f'v {v.index + 1}\n'
    for e in g.es:
        output_string += f'e {e.source + 1} {e.target + 1}\n'
    output_string += f'g {edge_file.split("_")[0]} {i+1}\n'
    output_string += 'x 0\n\n'
    # if i == 3:
    #     break

output_string = output_string.strip() + '\n'


In [44]:
with open('lolxd.nel', 'w') as f:
    f.write(output_string)