# Creación de grafos
Funcionalidades para transformar los archivos ".csv" dados en grafos guardados en archivos con la extensión ".gexf".

Dos tipos de modos: 
* MODE = "user": red de usuarios unidos si han usado uno (o más) hashtags
* MODE = "hashtag": red de hashtags unidos si han sido usados por uno (o más) usuarios

In [13]:
import networkx as nx
import pandas as pd
from tqdm import tqdm
import os

In [23]:
data_folder = "data/"
graphs_folder_base = "graphs2/nodes_"

In [22]:
def create_bipartite_graph(df, graphs_folder, MANIFESTACION):
    df_h = df["hour"].unique()
    print("Creando redes bipartitas, manifestación seleccionada:", MANIFESTACION, "número de horas: ", len(df_h))
    G = nx.Graph()
    for hour in tqdm(df_h):
        df_hour = df[(df["hour"] == hour)]
        G = nx.from_pandas_edgelist(df_hour, source="user", target="hashtag", edge_attr="weight")
        nx.write_gexf(G, graphs_folder + str(hour) + ".gexf")

In [24]:
def create_graphs(node_criteria, edge_criteria, df, graphs_folder):
    df_h = df["hour"].unique()
    print("Creando redes de", node_criteria, "unidos si comparten uno o más", edge_criteria, ", manifestación seleccionada:", MANIFESTACION, "número de horas: ", len(df_h))
    for hour in tqdm(df_h):
        G = nx.Graph()
        df_hour = df[(df["hour"] == hour)]
        df_nodes = df_hour[node_criteria].unique()
        G.add_nodes_from(df_nodes)
        for node in df_nodes:
            # Seleccionamos las filas del dataframe con el usuario/hashtag sobre el que iteramos
            df_node_edge = df_hour.loc[df_hour[node_criteria] == node]
            # Seleccionamos tantos hashtags/usuarios como haya que haya compartido el usuario/hasthag respectivamente
            df_node_edge = df_node_edge[edge_criteria]
            for edge in df_node_edge:
                df_edge = df_hour.loc[df_hour[edge_criteria] == edge]
                df_edge = df_edge[node_criteria]
                for nd in df_edge:
                    if nd != node:
                        if G.has_edge(node, nd):
                            G[node][nd]["weight"] += 1
                        else:
                            G.add_edge(node, nd, weight = 1)
        nx.write_gexf(G, graphs_folder + str(hour) + ".gexf")

## MANIESTACIÓN 9N

In [17]:
# Seleccionamos el fichero correspondiente a la manifestación social de la que queremos sacar la red
MANIFESTACION = "9n"
file_selected = data_folder + MANIFESTACION + '.txt'
df_9n = pd.read_csv(file_selected, sep=' ')

### MODE HASHTAGS AS NODES

In [18]:
graphs_folder = graphs_folder_base  + "hashtag" + '/' + MANIFESTACION + '/'
create_graphs("hashtag", "user", df_9n, graphs_folder)

[437040 437036 437002 436994 437006 437037 436995 437034 437007 437022
 437015 437021 437061 437078 437038 437020 437023 437035 437018 437068
 437003 436989 437024 437025 437055 437059 437060 436986 437047 437027
 436987 437056 437065 437057 437026 437030 436998 436988 437062 437042
 437005 437041 437074 437052 436985 437012 437029 437017 437039 436992
 437019 437011 436984 437010 437063 437033 437069 436996 436991 437050
 437066 437079 436990 437028 437051 437075 437046 437031 437014 436993
 437053 437032 437016 437054 437076 437064 437077 437049 436999 437073
 437000 437004 437044 437048 437071 437072 437009 437013 437058 437070
 437008 437067 437043 437045 436997 437001 436983]
Creando redes de hashtag unidos si comparten uno o más user , manifestación seleccionada: 9n número de horas:  97


100%|██████████| 97/97 [00:34<00:00,  2.79it/s]


### MODE USERS AS NODES

In [19]:
graphs_folder = graphs_folder_base  + "user" + '/' + MANIFESTACION + '/'
create_graphs("user", "hashtag", df_9n, graphs_folder)

[437040 437036 437002 436994 437006 437037 436995 437034 437007 437022
 437015 437021 437061 437078 437038 437020 437023 437035 437018 437068
 437003 436989 437024 437025 437055 437059 437060 436986 437047 437027
 436987 437056 437065 437057 437026 437030 436998 436988 437062 437042
 437005 437041 437074 437052 436985 437012 437029 437017 437039 436992
 437019 437011 436984 437010 437063 437033 437069 436996 436991 437050
 437066 437079 436990 437028 437051 437075 437046 437031 437014 436993
 437053 437032 437016 437054 437076 437064 437077 437049 436999 437073
 437000 437004 437044 437048 437071 437072 437009 437013 437058 437070
 437008 437067 437043 437045 436997 437001 436983]
Creando redes de user unidos si comparten uno o más hashtag , manifestación seleccionada: 9n número de horas:  97


100%|██████████| 97/97 [01:16<00:00,  1.27it/s]


### MODE BIPARTITE GRAPH

In [25]:
graphs_folder = graphs_folder_base + "bipartite/" + MANIFESTACION + '/'
create_bipartite_graph(df_9n, graphs_folder, "9n")

Creando redes bipartitas, manifestación seleccionada: 9n número de horas:  97


100%|██████████| 97/97 [00:05<00:00, 18.21it/s]


## MANIFESTACION NAT

In [26]:
# Seleccionamos el fichero correspondiente a la manifestación social de la que queremos sacar la red
MANIFESTACION = "nat"
file_selected = data_folder + MANIFESTACION + '.txt'
df_nat = pd.read_csv(file_selected, sep=' ')

### MODE HASHTAGS AS NODES

In [27]:
graphs_folder = graphs_folder_base  + "hashtag" + '/' + MANIFESTACION + '/'
create_graphs("hashtag", "user", df_nat, graphs_folder)

[429569 429665 429543 429639 429663 429711 429591 429759 429735 429615
 429567 429687 429783 429788 429781 429667 429758 429642 429546 429590
 429592 429596 429612 429634 429671 429622 429672 429673 429640 429625
 429686 429575 429670 429635 429767 429638 429669 429696 429684 429601
 429620 429624 429627 429646 429650 429664 429619 429742 429621 429708
 429729 429757 429769 429710 429733 429766 429715 429768 429734 429613
 429637 429644 429668 429674 429695 429744 429675 429782 429623 429784
 429552 429722 429550 429690 429786 429762 429610 429531 429756 429577
 429609 429585 429753 429682 429752 429778 429681 429559 429532 429787
 429614 429764 429717 429736 429760 429714 429636 429689 429761 429584
 429685 429662 429540 429698 429688 429598 429578 429645 429712 429566
 429739 429725 429716 429583 429576 429699 429718 429580 429666 429649
 429599 429700 429605 429721 429773 429572 429571 429626 429741 429749
 429737 429594 429555 429723 429588 429589 429738 429754 429691 429755
 42959

100%|██████████| 260/260 [00:24<00:00, 10.40it/s]


### MODE USERS AS NODES

In [28]:
graphs_folder = graphs_folder_base  + "user" + '/' + MANIFESTACION + '/'
create_graphs("user", "hashtag", df_nat, graphs_folder)

[429569 429665 429543 429639 429663 429711 429591 429759 429735 429615
 429567 429687 429783 429788 429781 429667 429758 429642 429546 429590
 429592 429596 429612 429634 429671 429622 429672 429673 429640 429625
 429686 429575 429670 429635 429767 429638 429669 429696 429684 429601
 429620 429624 429627 429646 429650 429664 429619 429742 429621 429708
 429729 429757 429769 429710 429733 429766 429715 429768 429734 429613
 429637 429644 429668 429674 429695 429744 429675 429782 429623 429784
 429552 429722 429550 429690 429786 429762 429610 429531 429756 429577
 429609 429585 429753 429682 429752 429778 429681 429559 429532 429787
 429614 429764 429717 429736 429760 429714 429636 429689 429761 429584
 429685 429662 429540 429698 429688 429598 429578 429645 429712 429566
 429739 429725 429716 429583 429576 429699 429718 429580 429666 429649
 429599 429700 429605 429721 429773 429572 429571 429626 429741 429749
 429737 429594 429555 429723 429588 429589 429738 429754 429691 429755
 42959

100%|██████████| 260/260 [00:37<00:00,  6.99it/s]


### MODE BIPARTITE GRAPH

In [30]:
graphs_folder = graphs_folder_base + "bipartite/" + MANIFESTACION + '/'
create_bipartite_graph(df_nat, graphs_folder, "nat")

Creando redes bipartitas, manifestación seleccionada: nat número de horas:  260


100%|██████████| 260/260 [00:04<00:00, 54.18it/s]
