In [12]:
import plotly.express as px
import pandas as pd
from pandas import DataFrame
from pandas.core.interchange.dataframe_protocol import DataFrame
from copy import copy
import plotly.graph_objects as go
import pandas as pd
import networkx as nx
import plotly.graph_objects as go

dataset: DataFrame = pd.read_pickle('./transform/data/metadata.pickle')

In [13]:
semester_df = copy(dataset)

semester_df['dc.date.issued'] = pd.to_datetime(semester_df['dc.date.issued'], errors='coerce')
semester_df['Ano'] = semester_df['dc.date.issued'].dt.year
semester_df['Semestre'] = semester_df['dc.date.issued'].dt.month.apply(lambda x: 1 if x <= 6 else 2)


aggregated = semester_df.groupby(['Ano', 'Semestre']).agg(
    Quantidade=('dc.title', 'count'),
    Títulos=('dc.title', lambda titles: '<br>'.join(titles))
).reset_index()

pivot_df = aggregated.pivot(index='Ano', columns='Semestre', values='Quantidade').fillna(0).reset_index()

tooltip_df = aggregated.pivot(index='Ano', columns='Semestre', values='Títulos').fillna('Sem dados').reset_index()

fig_semester_bars = go.Figure()

for semestre in [1, 2]:
    fig_semester_bars.add_trace(go.Bar(
        x=pivot_df['Ano'],
        y=pivot_df[semestre],
        name=f"Semestre {semestre}",
        text=pivot_df[semestre],
        hoverinfo='text',
        hovertext=tooltip_df[semestre]))

fig_semester_bars.update_layout(
    barmode='group',
    title='Quantidade de Trabalhos por Semestre',
    xaxis_title='Ano',
    yaxis_title='Quantidade de Trabalhos',
    legend_title='Semestre',
    xaxis=dict(tickmode='linear')
)

fig_semester_bars.show()


In [14]:
network_dataset = copy(dataset)

advisor_heights = network_dataset['dc.contributor.advisor'].value_counts().reset_index()
advisor_heights.columns = ['id', 'count']
advisor_heights['group'] = 1

subject_heights = network_dataset.explode('dc.subject')['dc.subject'].value_counts().reset_index()
subject_heights.columns = ['id', 'count']
subject_heights['group'] = 0

edges = network_dataset.explode('dc.subject')[['dc.subject', 'dc.contributor.advisor']]
edges.columns = ['source', 'target']
edges_weighted = edges.groupby(['source', 'target']).size().reset_index(name='weight')

nodes = pd.concat([advisor_heights, subject_heights], ignore_index=True)
node_attributes = nodes.set_index('id').to_dict(orient='index')


G = nx.from_pandas_edgelist(edges_weighted, source='source', target='target', edge_attr='weight')
nx.set_node_attributes(G, node_attributes)

In [15]:
pos = nx.spring_layout(G, seed=42)

# Extraindo posições e atributos para o plotly
x_nodes = [pos[node][0] for node in G.nodes()]
y_nodes = [pos[node][1] for node in G.nodes()]
node_colors = ['red' if node_attributes[node]['group'] == 1 else 'blue' for node in G.nodes()]
node_labels = list(G.nodes())

# Criando as arestas com posições
edge_x = []
edge_y = []
edge_widths = []
for edge in G.edges(data=True):
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])  # Para desconectar as arestas
    edge_y.extend([y0, y1, None])
    edge_widths.append(edge[2]['weight'])

# Criando o traçado das arestas
edge_trace = go.Scatter(
    x=edge_x,
    y=edge_y,
    line=dict(width=1, color='gray'),  # As larguras serão ajustadas a seguir
    hoverinfo='none',
    mode='lines'
)

# Criando o traçado dos nós
node_trace = go.Scatter(
    x=x_nodes,
    y=y_nodes,
    mode='markers+text',
    text=node_labels,
    textposition='top center',
    marker=dict(
        size=20,
        color=node_colors,
        line=dict(width=2, color='black')
    ),
    hoverinfo='text'
)

# Configurando o layout
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='Orientadores e Temas',
                    titlefont_size=16,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=40),
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False)
                ))

fig.show()

In [16]:
# Filtrar nós com mais de 2 conexões
filtered_nodes = [node for node in G.nodes() if G.degree(node) > 2]

# Filtrar arestas que conectam os nós filtrados
filtered_edges = [(u, v, data) for u, v, data in G.edges(data=True) if u in filtered_nodes and v in filtered_nodes]

# Gerar um subgrafo com os nós e arestas filtrados
subgraph = G.subgraph(filtered_nodes)

# Atualizar as posições com base no subgrafo
pos = nx.spring_layout(subgraph, seed=42)

# Extraindo posições e atributos para o Plotly
x_nodes = [pos[node][0] for node in subgraph.nodes()]
y_nodes = [pos[node][1] for node in subgraph.nodes()]
node_colors = ['red' if node_attributes[node]['group'] == 1 else 'blue' for node in subgraph.nodes()]
node_labels = list(subgraph.nodes())

# Criando as arestas com posições
edge_x = []
edge_y = []
for edge in subgraph.edges(data=True):
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])  # Para desconectar as arestas
    edge_y.extend([y0, y1, None])

# Criando o traçado das arestas
edge_trace = go.Scatter(
    x=edge_x,
    y=edge_y,
    line=dict(width=1, color='gray'),
    hoverinfo='none',
    mode='lines'
)

# Criando o traçado dos nós
node_trace = go.Scatter(
    x=x_nodes,
    y=y_nodes,
    mode='markers+text',
    text=node_labels,
    textposition='top center',
    marker=dict(
        size=20,
        color=node_colors,
        line=dict(width=2, color='black')
    ),
    hoverinfo='text'
)

# Configurando o layout
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='Orientadores e Temas (Nós com mais de 2 conexões)',
                    titlefont_size=16,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=40),
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False)
                ))

fig.show()


In [17]:
import networkx as nx
import pandas as pd
import plotly.graph_objects as go

# Filtrar temas com weight > 2
filtered_nodes = [
    node for node, attr in node_attributes.items()
    if attr['group'] == 0 and attr['count'] > 2 or attr['group'] == 1
]

# Criar um subgrafo com os nós e arestas filtrados
filtered_edges = [
    (u, v, d) for u, v, d in G.edges(data=True) if u in filtered_nodes and v in filtered_nodes
]

# Criar o novo grafo filtrado
filtered_graph = nx.Graph()
filtered_graph.add_edges_from(filtered_edges)

# Adicionar atributos de nós ao subgrafo
nx.set_node_attributes(filtered_graph, {
    node: attr for node, attr in node_attributes.items() if node in filtered_nodes
})

# Gerar posições
pos = nx.spring_layout(filtered_graph, seed=42, k=10)

# Definir tamanho dos nós proporcional ao peso
node_sizes = [
    10 + 2 * attr['count'] for node, attr in filtered_graph.nodes(data=True)
]

# Definir cores dos nós com base no grupo
node_colors = [
    'red' if attr['group'] == 1 else 'blue'
    for node, attr in filtered_graph.nodes(data=True)
]

# Criar as posições para arestas
edge_x = []
edge_y = []
for edge in filtered_graph.edges(data=True):
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])

# Criar traçado das arestas
edge_trace = go.Scatter(
    x=edge_x,
    y=edge_y,
    line=dict(width=1, color='gray'),
    hoverinfo='none',
    mode='lines'
)

# Criar traçado dos nós
x_nodes = [pos[node][0] for node in filtered_graph.nodes()]
y_nodes = [pos[node][1] for node in filtered_graph.nodes()]
node_labels = list(filtered_graph.nodes())

node_trace = go.Scatter(
    x=x_nodes,
    y=y_nodes,
    mode='markers+text',
    text=node_labels,
    textposition='top center',
    marker=dict(
        size=node_sizes,
        color=node_colors,
        line=dict(width=2, color='black')
    ),
    hoverinfo='text'
)

# Configurar o layout e exibir o gráfico
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='Orientadores e Temas (Filtrados por Weight)',
                    titlefont_size=16,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=40),
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False)
                ))

fig.show()


In [18]:
import plotly.graph_objects as go
import forceatlas2
# Gerando posições com ForceAtlas2
positions = forceatlas2.forceatlas2_networkx_layout(G, pos=None)

# Extraindo coordenadas
x_nodes = [positions[node][0] for node in G.nodes()]
y_nodes = [positions[node][1] for node in G.nodes()]
node_labels = list(G.nodes())

# Criando traçado de nós
node_trace = go.Scatter(
    x=x_nodes,
    y=y_nodes,
    mode="markers+text",
    text=node_labels,
    marker=dict(size=10, color="blue"),
    hoverinfo="text"
)

# Criando arestas
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = positions[edge[0]]
    x1, y1 = positions[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])

edge_trace = go.Scatter(
    x=edge_x,
    y=edge_y,
    line=dict(width=0.5, color="gray"),
    hoverinfo="none",
    mode="lines"
)

# Configurando e exibindo o gráfico
fig = go.Figure(data=[edge_trace, node_trace])
fig.update_layout(title="ForceAtlas2 Layout", showlegend=False)
fig.show()

AttributeError: module 'networkx' has no attribute 'to_numpy_matrix'

In [19]:
import pandas as pd
import networkx as nx
from fa2 import ForceAtlas2
import plotly.graph_objects as go

# 1. Carregar os dados
# Supondo que 'network_dataset' seja um DataFrame já definido anteriormente
# com as colunas 'dc.contributor.advisor' e 'dc.subject'.

# Processar os nós (orientadores e temas)
advisor_heights = network_dataset['dc.contributor.advisor'].value_counts().reset_index()
advisor_heights.columns = ['id', 'count']
advisor_heights['group'] = 1  # Grupo 1 para orientadores

subject_heights = network_dataset.explode('dc.subject')['dc.subject'].value_counts().reset_index()
subject_heights.columns = ['id', 'count']
subject_heights['group'] = 0  # Grupo 0 para temas

# Criar as arestas
edges = network_dataset.explode('dc.subject')[['dc.subject', 'dc.contributor.advisor']]
edges.columns = ['source', 'target']
edges_weighted = edges.groupby(['source', 'target']).size().reset_index(name='weight')

# Criar os nós
nodes = pd.concat([advisor_heights, subject_heights], ignore_index=True)
node_attributes = nodes.set_index('id').to_dict(orient='index')

# Criar o grafo a partir das arestas
G = nx.from_pandas_edgelist(edges_weighted, source='source', target='target', edge_attr='weight')
nx.set_node_attributes(G, node_attributes)

# 2. Filtrar apenas nós de temas com peso > 2 e seus orientadores conectados
filtered_nodes = [
    node for node, attr in node_attributes.items()
    if (attr['group'] == 0 and attr['count'] > 2) or attr['group'] == 1
]

filtered_edges = [
    (u, v, d) for u, v, d in G.edges(data=True) if u in filtered_nodes and v in filtered_nodes
]

filtered_graph = nx.Graph()
filtered_graph.add_edges_from(filtered_edges)

# Adicionar atributos de volta ao subgrafo
nx.set_node_attributes(filtered_graph, {
    node: attr for node, attr in node_attributes.items() if node in filtered_nodes
})

# 3. Gerar o layout ForceAtlas2
forceatlas2 = ForceAtlas2(
    outboundAttractionDistribution=True,
    linLogMode=False,
    adjustSizes=True,
    scalingRatio=2.0,
    gravity=1.0,
    barnesHutOptimize=True,
    barnesHutTheta=1.2,
    iterations=2000
)

adj_matrix = nx.to_numpy_array(filtered_graph)
positions = forceatlas2.forceatlas2_networkx_layout(filtered_graph, pos=None)

# 4. Preparar os dados para Plotly
x_nodes = [positions[node][0] for node in filtered_graph.nodes()]
y_nodes = [positions[node][1] for node in filtered_graph.nodes()]
node_sizes = [10 + 2 * node_attributes[node]['count'] for node in filtered_graph.nodes()]
node_colors = ['red' if node_attributes[node]['group'] == 1 else 'blue' for node in filtered_graph.nodes()]
node_labels = list(filtered_graph.nodes())

# Arestas
edge_x = []
edge_y = []
for edge in filtered_graph.edges(data=True):
    x0, y0 = positions[edge[0]]
    x1, y1 = positions[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])

# Traçados para Plotly
edge_trace = go.Scatter(
    x=edge_x,
    y=edge_y,
    line=dict(width=0.5, color='gray'),
    hoverinfo='none',
    mode='lines'
)

node_trace = go.Scatter(
    x=x_nodes,
    y=y_nodes,
    mode='markers+text',
    text=node_labels,
    textposition='top center',
    marker=dict(
        size=node_sizes,
        color=node_colors,
        line=dict(width=2, color='black')
    ),
    hoverinfo='text'
)

# 5. Configurar e exibir o gráfico
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title="ForceAtlas2 Layout (Orientadores e Temas)",
                    titlefont_size=16,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=40),
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False)
                ))

fig.show()

ModuleNotFoundError: No module named 'fa2'