In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import pyvis

df2 = pd.read_csv('Alliances.csv')

G = nx.from_pandas_edgelist(df2, 'Side_a','Side_b')

#Using matplotlib for visualization
plt.figure(figsize=(20,20), dpi=100)
pos = nx.kamada_kawai_layout(G)
nx.draw(G, pos=pos, with_labels=True, node_size=300, node_color='orange')
nx.draw_networkx_edges(G, pos=pos, width=2, edge_color='brown')
plt.savefig("Network_Edges.jpg")


#Using the Pyvis library for better visualization.
from pyvis.network import Network

centrality = nx.degree_centrality(G)
net = Network(notebook=True, height="500px", width="1000px", bgcolor="#222222", 
              font_color="white")
for node, centrality_measure in centrality.items():
    size = centrality_measure * 400  # Adjust the scale factor as needed
    net.add_node(node, size=size)
for edge in G.edges():
    net.add_edge(*edge)
net.show("network.html")

# Now I check different centrality measures
degree = nx.degree_centrality(G)
deg_sort = sorted(degree.items(), key=lambda x: x[1], reverse=True)
deg_sort

bet = nx.betweenness_centrality(G)
bet_sort = sorted(bet.items(), key=lambda x: x[1], reverse=True)
bet_sort

close = nx.closeness_centrality(G)
close_sort = sorted(close.items(), key=lambda x: x[1], reverse=True)
close_sort

#Now I will implement different prediction algorithms. 
#First I will start with the Jaccard Coefficient

import random
from sklearn import metrics
from sklearn.metrics import roc_auc_score

proportion_edges=.1
auc_jaccard_list = []
for i in range(100):
    edge_subset = random.sample(G.edges(), int(proportion_edges*G.number_of_edges()))
    
    #Create a copy of the graph and remove the edges
    G_train= G.copy()
    G_train.remove_edges_from(edge_subset)
    
    #Make prediction using Jaccard Coefficient
    pred_jaccard = list(nx.jaccard_coefficient(G_train))
    score_jaccard, label_jaccard = zip(*[(s, (u,v) in edge_subset) for (u,v,s) in pred_jaccard])

    #Compute the ROC AUC Score fo Jaccard Coefficient
    fpr_jaccard, tpr_jaccard, _= metrics.roc_curve(label_jaccard, score_jaccard)
    auc_jaccard = roc_auc_score(label_jaccard, score_jaccard)
    auc_jaccard_list.append(auc_jaccard)
print(np.max(auc_jaccard_list))


#Now I will implement the Adamic Adar Index: it is a measure to the number of shared links between
#two vertices. It is defined as the sum of the inverse logarithmic degree centrality of the 
#neighbors shared by the two vertices. 
proportion_edges=.1
auc_adamic_list = []
for i in range(100):
    edge_subset = random.sample(G.edges(), int(proportion_edges*G.number_of_edges()))
    G_train= G.copy()
    G_train.remove_edges_from(edge_subset)
    
    pred_adamic = list(nx.adamic_adar_index(G_train))
    score_adamic, label_adamic = zip(*[(s, (u,v) in edge_subset) for (u,v,s) in pred_adamic])

#Compute the ROC AUC Score fo Jaccard Coefficient
    fpr_adamic, tpr_adamic, _= metrics.roc_curve(label_adamic, score_adamic)
    auc_adamic = roc_auc_score(label_adamic, score_adamic)
    auc_adamic_list.append(auc_adamic)

#Print results
print(np.max(auc_adamic_list))

#Now I compute the Resource Allocation. It measures a fraction of a resource, for example, 
#information or something else that a node can send to another node through their common neighbors. 

proportion_edges=.1
auc_ra_list = []
for i in range(100):
    edge_subset = random.sample(G.edges(), int(proportion_edges*G.number_of_edges()))
    
    G_train= G.copy()
    G_train.remove_edges_from(edge_subset)
    
    pred_ra = list(nx.resource_allocation_index(G_train))
    score_ra, label_ra = zip(*[(s, (u,v) in edge_subset) for (u,v,s) in pred_ra])

#Compute the ROC AUC Score fo Jaccard Coefficient
    fpr_ra, tpr_ra, _= metrics.roc_curve(label_ra, score_ra)
    auc_ra = roc_auc_score(label_ra, score_ra)
    auc_ra_list.append(auc_ra)

#Print results
print(np.max(auc_ra_list))

#Now I Compute the Preferential Attachment algorithm.

proportion_edges=.1
auc_pref_list = []
for i in range(100): 
    edge_subset = random.sample(G.edges(), int(proportion_edges*G.number_of_edges()))
    
    #Create a copy of the graph and remove the edges
    G_train= G.copy()
    G_train.remove_edges_from(edge_subset)
    
    pred_pref = list(nx.preferential_attachment(G_train))
    score_pref, label_pref = zip(*[(s, (u,v) in edge_subset) for (u,v,s) in pred_pref])

#Compute the ROC AUC Score fo Jaccard Coefficient
    fpr_pref, tpr_pref,_=metrics.roc_curve(label_pref, score_pref)
    auc_pref = roc_auc_score(label_pref, score_pref)
    auc_pref_list.append(auc_pref)

#Print results
print(np.max(auc_pref_list))

#Now I compare the highes AUC scores of all the algorithms.
plt.figure(0).clf()
RA = round(secondtop_auc,3)
Adamic = round(secondtop_adamic,3)
Jaccard = round(auc_jaccard,3)
Pref = round(secondtop_auc_pref,3)
plt.title("Comparison of AUC scores")
plt.plot(fpr_pref, tpr_pref, label ="Preferential Attachment, AUC =" + str(Pref))
plt.plot(fpr_adamic,tpr_adamic, label ="Adamic Adar Coefficient, AUC =" + str(Adamic))
plt.plot(fpr_ra, tpr_ra, label="Resource Allocation, AUC =" + str(RA))
plt.plot(fpr_jaccard, tpr_jaccard, label ="Jaccard Coefficient, AUC =" + str(Jaccard))

plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.legend(loc=0)
plt.savefig('AUC scores.png')

#Next, I store all my predictions in .csv files
import csv
pred_ra_complete = list(nx.resource_allocation_index(G))
with open('ra_predictions.csv', 'w') as f:
    writer = csv.writer(f, lineterminator='\n')
    for tup in pred_ra_complete:
        writer.writerow(tup)
        
pred_jaccard = list(nx.jaccard_coefficient(G))
with open('Jaccard_predictions.csv', 'w') as f:
    writer = csv.writer(f, lineterminator='\n')
    for tup in pred_jaccard:
        writer.writerow(tup)
        
pred_adamic = list(nx.adamic_adar_index(G))
with open('Adamic_predictions.csv', 'w') as f:
    writer = csv.writer(f, lineterminator='\n')
    for tup in pred_adamic:
        writer.writerow(tup)

pred_pref_attach = list(nx.preferential_attachment(G))
with open('Pref_attachment_predictions.csv', 'w') as f:
    writer = csv.writer(f, lineterminator='\n')
    for tup in pred_pref_attach:
        writer.writerow(tup)       

# Now I plot the degree historgram and rank plots.
degree_sequence = sorted((d for n, d in G.degree()), reverse=True)
dmax = max(degree_sequence)

fig = plt.figure("Degree of a random graph", figsize=(12, 12))
axgrid = fig.add_gridspec(2, 4)

ax1 = fig.add_subplot(axgrid[1:, :2])
ax1.plot(degree_sequence, "b-", marker="^")
ax1.set_title("Degree Rank Plot")
ax1.set_ylabel("Degree")
ax1.set_xlabel("Rank")

ax2 = fig.add_subplot(axgrid[1:, 2:])
ax2.bar(*np.unique(degree_sequence, return_counts=True))
ax2.set_title("Degree histogram")
ax2.set_xlabel("Degree")
ax2.set_ylabel("# of Nodes")

plt.savefig("Descriptive.png")
fig.tight_layout()
plt.show()

# Finally, I plot the predicted edges for each algorithm.
E = pd.read_csv("Alliances_with_Jaccard_pred.csv")
S =nx.from_pandas_edgelist(E,'Side_a','Side_b', edge_attr="Weight")

F = pd.read_csv("Alliances_with_RA_pred.csv")
F = nx.from_pandas_edgelist(F, "Side_a", "Side_b", edge_attr="Weight")

H = pd.read_csv("Alliances_with_Adamic_pred.csv")
H = nx.from_pandas_edgelist(H, "Side_a", "Side_b", edge_attr="Weight")

J = pd.read_csv("Alliances_with_Pref_pred.csv")
J = nx.from_pandas_edgelist(J, "Side_a", "Side_b", edge_attr="Weight")

T=nx.grid_2d_graph(4,4)
plt.figure(figsize=(50, 50), dpi=200)


plt.subplot(221)
plt.margins(x=.1)
ax = plt.gca()
ax.set_title("Jaccard Index Predictions", fontsize=30)
pos = nx.kamada_kawai_layout(S)
nx.draw(S, pos, node_size=500, node_color='orange', with_labels=True,
        edgelist=edges, edge_color=colors, width=1)

plt.subplot(222)
plt.margins(x=.1)
ax = plt.gca()
ax.set_title("Resource Allocation Index Predictions", fontsize=30)
pos = nx.kamada_kawai_layout(F)
nx.draw(F, pos, node_size=500, node_color='orange', with_labels=True,
        edgelist=edges, edge_color=colors, width=1)

plt.subplot(223)
plt.margins(x=.1)
ax = plt.gca()
ax.set_title("Adamic Adar Index Predictions", fontsize=30)
pos = nx.kamada_kawai_layout(H)
nx.draw(H, pos, node_size=500, node_color='orange', with_labels=True,
        edgelist=edges, edge_color=colors, width=1)


plt.subplot(224)
plt.margins(x=.1)
ax = plt.gca()
ax.set_title("Preferential Attachment Score Predictions", fontsize=30)
pos = nx.spring_layout(J)
nx.draw(J, pos, node_size=500, node_color='orange', with_labels=True,
        edgelist=edges, edge_color=colors, width=1)

plt.savefig('All_four_predicted_networks')



