In [4]:
import networkx as nx
import pandas as pd
import get_dataremodelled as gdr

In [5]:
gender=pd.read_csv('gendered_titles_final.csv')
gender=gender.drop_duplicates(subset='Name')
revisions=pd.read_csv('Data/revisions_en.csv')
revisions_es=pd.read_csv('Data/revisions_es.csv')
revisions_de=pd.read_csv('Data/revisions_de.csv')
revisions_fr=pd.read_csv('Data/revisions_fr.csv')
communities_en=pd.read_csv('all_communities_infomap.csv',sep=';')
communities_es=pd.read_csv('all_communities_infomap_es.csv',sep=';')
communities_de=pd.read_csv('all_communities_infomap_de.csv',sep=';')
communities_fr=pd.read_csv('all_communities_infomap_fr.csv',sep=';')

In [4]:
def prepgraph(df,gender,communities_en,communities_other):
    df=gdr.prep_data(df,gender)
    df=gdr.prep_links(df,gender)
    df=df[df['Year']==2024]
    G = nx.DiGraph()
    for index, row in df.iterrows():
        source = row['Name']
        target = row['Link']
        status = row['added_or_removed']
        df_temp=df[df['Name']==target]
        targetlen=len(df_temp)
        df_temp=df[df['Name']==source]
        sourcelen=len(df_temp)
        if (status == 'added' or status == 'unchanged') and target!=source:
            if source not in G:
                G.add_node(source)
            if targetlen>0 or sourcelen>0:
                if target not in G:
                    G.add_node(target)
                G.add_edge(source, target)  

    communities_en=communities_en[communities_en['Year']==2024]
    communities_en=communities_en[communities_en['Size']>50]
    communities_en['Members']=communities_en['Members'].apply(lambda x: x.split(', '))
    communities_en=communities_en.explode('Members')
    communities_en.rename(columns = {'Members':'Name'}, inplace = True)
    communities_other=communities_other[communities_other['Year']==2024]
    communities_other=communities_other[communities_other['Size']>50]
    communities_other['Members']=communities_other['Members'].apply(lambda x: x.split(', '))
    communities_other=communities_other.explode('Members')
    communities_other.rename(columns = {'Members':'Name'}, inplace = True)
    communities_en=communities_en[['Name','Community ID']]
    communities_other=communities_other[['Name','Community ID']]
    communities_en['Community ID'] = 'C' + communities_en['Community ID'].astype(str)
    communities_other['Community ID'] = 'C' + communities_other['Community ID'].astype(str)
    for node in G.nodes():
        if node in communities_en['Name'].values:
            G.nodes[node]['CommunityEN']=communities_en[communities_en['Name']==node]['Community ID'].values[0]
        else:
            G.nodes[node]['CommunityEN']='None'
        if node in communities_other['Name'].values:
            G.nodes[node]['CommunityLang']=communities_other[communities_other['Name']==node]['Community ID'].values[0]
        else:
            G.nodes[node]['CommunityLang']='None'
        if gender[gender['Name']==node]['gender'].values[0]=='f':
            G.nodes[node]['gender']='f'
        else:
            G.nodes[node]['gender']='m'
    return G


In [5]:
G_en=prepgraph(revisions,gender,communities_en,communities_en)


In [4]:
G_en=prepgraph(revisions,gender,communities_en,communities_en)
nx.write_gexf(G_en, "English.gexf")

G_es=prepgraph(revisions_es,gender,communities_en,communities_es)
nx.write_gexf(G_es, "Spanish.gexf")

G_de=prepgraph(revisions_de,gender,communities_en,communities_de)
nx.write_gexf(G_de, "German.gexf")

G_fr=prepgraph(revisions_fr,gender,communities_en,communities_fr)
nx.write_gexf(G_fr, "French.gexf")

In [None]:
#Small graph for growth example
G_small = nx.DiGraph()
G_small.add_edge('V1', 'V8')
G_small.add_edge('V1', 'V2')
G_small.add_edge('V2', 'V3')
G_small.add_edge('V2', 'V4')
G_small.add_edge('V4', 'V7')
G_small.add_edge('V3', 'V6')
G_small.add_edge('V2', 'V5')
G_small.nodes['V1']['color']='1'
G_small.nodes['V2']['color']='1'
G_small.nodes['V3']['color']='2'
G_small.nodes['V4']['color']='2'
G_small.nodes['V5']['color']='3'
G_small.nodes['V6']['color']='3'
G_small.nodes['V7']['color']='4'
G_small.nodes['V8']['color']='5'
nx.write_gexf(G_small, "small_example.gexf")