## Journal network

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import networkx as nx
import scipy.stats as stats
import pickle

from itertools import chain, combinations
from scipy.cluster.hierarchy import dendrogram
from unidecode import unidecode
from collections import Counter
from operator import itemgetter

### Loading data

In [None]:
authorsDataPath = "magazine/data/authorsData.xlsx"
combinedWorksPath = "magazine/data/combinedWorks.xlsx"
workPerAuthorPath = "magazine/data/workPerAuthor.xlsx"
authorsAndWorksPath = "magazine/data/authorsAndWorks.xlsx"

authorsData = pd.read_excel(authorsDataPath)
combinedWorks = pd.read_excel(combinedWorksPath)
workPerAuthor = pd.read_excel(workPerAuthorPath)
authorsAndWorks = pd.read_excel(authorsAndWorksPath)

# authorsData.head(60)
authorsAndWorks.head(15)

In [None]:
authorsAndWorks.groupby('Source title').size().reset_index(name='Num of work per Source title').sort_values(by='Num of work per Source title', ascending=False).head(50)

### Forming a network of journals

In [None]:
# Removing blank characters
authorsAndWorks["Source title"] = authorsAndWorks["Source title"].apply(lambda x: x.strip())
# workPerAuthor.head(10)

# Netowrk labels
networkLabels = authorsAndWorks['Source title'].unique()
print(f"Number of journals is: {len(networkLabels)}")

magazineNetwork = nx.DiGraph()

# Adding Nodes (Journals)
magazineNetwork.add_nodes_from(authorsAndWorks['Source title'].unique())

# Adding branches with weights (collaborations)
for _, group_data in authorsAndWorks.groupby('Author'):
    journals = group_data['Source title'].unique()
    if len(journals) > 1:
        for i in range(len(journals)):
            for j in range(i + 1, len(journals)):
                # If the branch already exists, increase the weight
                if magazineNetwork.has_edge(journals[i], journals[j]):
                    magazineNetwork.edges[journals[i], journals[j]]['weight'] += 1
                else:
                    # If the branch does not exist, add it with weight 1
                    magazineNetwork.add_edge(journals[i], journals[j], weight=1)

print(magazineNetwork.edges.data("weight"))
print(magazineNetwork.number_of_nodes())
print(magazineNetwork.number_of_edges())

nx.write_gml(magazineNetwork, "magazine/models/journal-networks.gml")

## 2. Basic characterization of modeled networks
### 2.1 Coefficient of clustering

In [None]:
H = magazineNetwork.to_undirected()

print("Number of connected components: " , nx.number_connected_components(H))

connected_componnets = sorted(nx.connected_components(H), key=len, reverse=True)
print("Size of connected components:")
for x in connected_componnets:
    print(len(x) , "  ")
    
print( "Average clustering coefficient: " , nx.average_clustering(H), ", Global clustering coefficient:" , nx.transitivity(H) )

### 2.2 Local Degree of clustering

In [None]:
author, clustering_coef = zip(*nx.clustering(H, weight = "weight").items())

no_zero = [(auth, cc)  for auth, cc in zip(author, clustering_coef) if cc > 0]

df = pd.DataFrame(no_zero, columns = ["Source title", "Cc"])
df.sort_values('Cc', inplace = True)

max_local_degree_of_clustering = max(clustering_coef)

average_degree_of_clustering = nx.average_clustering(H)

print(f"Max local cc: {max_local_degree_of_clustering}")
print(f"Average cc: {average_degree_of_clustering}")
print("Non-zero local degrees of clustering:")
print(df)

# ax = df.plot.scatter(x='Author', y='Cc')
# ax.set_xscale("log")
# ax.set_yscale("log");

plt.hist(clustering_coef, bins=10, edgecolor='black', color = "green")

plt.title('Local Clustering Coefficient Histogram')
plt.xlabel('Local Clustering Coefficient')
plt.ylabel('Frequency')

plt.savefig('magazine/images/local_cc_mag_network.png')
plt.show()

### 2.3 Assortative mixing

In [None]:
def plot_deg_corr(g, xscale = "linear", yscale = "linear"):
    first = []
    second = []
    for i, k in g.edges():
        first.append(g.degree(i))
        first.append(g.degree(k))
        second.append(g.degree(k))
        second.append(g.degree(i))

    plt.figure(1)   

    plt.xlabel('degree')                                                                                                             
    plt.xscale(xscale)                                                                                                                
    plt.xlim(1, max(first)) 

    plt.ylabel('degree')                                                                                                          
    plt.yscale(yscale)                                                                                                                
    plt.ylim(1, max(second))                                                                                                             

    plt.scatter(first, second, marker='.')        
    plt.savefig('magazine/images/mag_net_asortativity_mix.png')
    plt.show()

plot_deg_corr(H)

assortativity = nx.degree_assortativity_coefficient(H)
print(f"Mixing assortativeness coefficient: {assortativity}")