# Network Analysis Group Report

In [None]:
#Importing libraries
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import networkx.algorithms.community as nx_comm
import matplotlib.colors as mcolors
import statistics as st
import cdlib
from cdlib import algorithms, readwrite
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

## Cleaning Data

In [None]:
#Loading dataframe into a variable
df = pd.read_csv('com-amazon.ungraph.txt', header = None)

In [None]:
df.head()

In [None]:
#Ingoring first four rows of unwanted data
df = df.iloc[4:]

In [None]:
df.head()

In [None]:
#Data Info
df.info()

In [None]:
#Splitting the data column into two separate columns i.e., Node1 and Node2
df[["Node1", "Node2"]] = df[0].str.split("\t", expand = True)

In [None]:
df.head()

In [None]:
#Dropping the unwanted column
df.drop(0,axis=1, inplace = True)

In [None]:
df.head()

In [None]:
#Writing the processed data to csv file
df.to_csv("co_purchase_data.csv", index = None)

In [None]:
#Reading data
data = pd.read_csv("co_purchase_data.csv")

In [None]:
data.head()

In [None]:
#Shape
data.shape

In [None]:
#Subset of the dataset
data_subset = data[:10000]

In [None]:
#Shape
print("Shape of chosen subset : ",data_subset.shape)

In [None]:
data_subset.head()

In [None]:
#Writing subset to csv file
data_subset.to_csv("co_purchased_data_subset_10000.csv", index = None)

## Visualizing the Network

In [None]:
#Reading subset data to variable
data=pd.read_csv("co_purchased_data_subset_10000.csv")

In [None]:
#Plotting graph
plt.figure(figsize=(25,25))
G1=nx.Graph()
G1.add_edges_from([(row['Node1'],row['Node2']) for i,row in data.iterrows()])
nx.draw(G1, with_labels = False, node_size = 25, node_color = 'Green', font_size = 10)

# Analyzing the Network

In [None]:
#Nodes and Edges
print("Number of Nodes in the network : ", G1.number_of_nodes())
print("Number of Edges in the network : ", G1.number_of_edges())

In [None]:
#List of nodes and edges
print("List of Nodes : ",list(G1.nodes))

In [None]:
print("List of Edges : ",list(G1.edges))

In [None]:
#Adjacency matrix
Adj_mat = nx.adjacency_matrix(G1)
print("ADJACENCY MATRIX (A) : \n\n",Adj_mat.todense())

In [None]:
#Adjacency matrix shape
print("Shape of adjacency matrix : ",Adj_mat.shape)

In [None]:
#Checking if path exists
nx.has_path(G1, 10, 80558)
nx.has_path(G1, 500, 168895)

In [None]:
#Checking for the shortest paths from a given source node
print("Shortest paths from source node 1 :")
nx.shortest_path(G1, source = 1)
print("\nShortest paths from source node 500 : ")
nx.shortest_path(G1, source = 500)

In [None]:
#Checking for the shortest paths to a given target node
print("Shortest paths to target node 94156 : ")
nx.shortest_path(G1, target = 94156)
print("\nShortest paths to target node 447165 : ")
nx.shortest_path(G1, target = 447165)

In [None]:
#Shortest paths for each node
print("Shortest paths for each node in the network : ")
nx.shortest_path(G1)

In [None]:
#Length of the shortest paths for each node
print("Length of the shortest paths for each node : ")
dict(nx.shortest_path_length(G1))

In [None]:
#Checking if the network is connected
print(nx.is_connected(G1))

In [None]:
#Checking if the network is planar network
print(nx.check_planarity(G1)[0])

In [None]:
#Finding number of connected components in the network
print("Number of components in the network : ",nx.number_connected_components(G1))

In [None]:
#Density of the network
print("Density of the network : ",nx.density(G1))

In [None]:
#Degree of each node in the network
d1 = G1.degree()
print("Degree of each node in the network : \n",d1)

In [None]:
#Mean degree of the network
n1=len(list(G1.edges))
mean_degree = sum(dict(d1).values())/n1
print("Mean Degree of the network : ",mean_degree)

In [None]:
#Finding the unique degree values and its count
degree_sequence_G1=sorted([d for n, d in G1.degree()], reverse=True)
degree_unique_G1=np.unique(degree_sequence_G1, return_counts=True)
print("Unique degree values and its count : \n",degree_unique_G1)

In [None]:
#Plotting degree frequency distribution
degree_plot=list(range(0, max(degree_unique_G1[0])+1))
freq_plot=[0] * len(list(range(0, max(degree_unique_G1[0])+1)))
for i in degree_unique_G1[0]:
    freq_plot[i]=degree_unique_G1[-1][list(degree_unique_G1[0]).index(i)]
    
d = {'degree': degree_plot, 
     'frequency': freq_plot}
df_G1 = pd.DataFrame(data=d)

fig, axes = plt.subplots(figsize=(35,10),nrows=1, ncols=1)

df_G1.plot.bar(x='degree', y='frequency', ax=axes)
axes.title.set_text("Degree distriution of G1")

In [None]:
#Closeness centrality of the network
print("Closeness Centrality : ")
nx.closeness_centrality(G1)

In [None]:
#Betweenness Centrality of the network
print("Betweenness Centrality : ")
nx.betweenness_centrality(G1)

In [None]:
#Enumerate all cliques
enumerate_cliques = list(nx.enumerate_all_cliques(G1))
enumerate_cliques

In [None]:
#Finding Cliques
print("Number of cliques : ",len(list(nx.find_cliques(G1))))
Cliques=list(nx.find_cliques(G1))
print("Cliques in network : ",Cliques)

In [None]:
#Finding code number of each node in the network
print("Core number of each node : ")
nx.core_number(G1)

In [None]:
#Nodes in the Main Core of the network
print("Nodes in the main core : ",list(nx.k_core(G1)))

In [None]:
#Main component and Nodes in the main component
k_comp = nx.k_components(G1)
k_comp_dict = dict(k_comp)
print("Main component : ",max(k_comp_dict))
print("Nodes in main component : ",max(k_comp_dict.values()))

In [None]:
#Average Clustering Coefficient
cluster_coef=nx.clustering(G1, nodes = None, weight = None)
print("Average clustering coefficient : ",st.mean(cluster_coef.values()))

## Finding communities using networkx community algorithm

In [None]:
#Modularity of the network
communities = list(nx_comm.greedy_modularity_communities(G1))
max_modularity=nx_comm.modularity(G1,communities)
print("Number of communities in the network: ",len(communities))
print("Modularity of the network : ",max_modularity)

In [None]:
#Finding node list and edge list considering each community as one node
neighbors=[]
for i in range(0,len(communities)):
    neighbors.append(set())
    
for i in range(0, len(communities)):
    for j in range(0, len(communities[i])):
        neighbors[i]=neighbors[i].union(set(G1.neighbors(list(communities[i])[j])))


Indicator=[]
for i in range(0,len(communities)):
    Indicator.append([])
    
for i in range(0, len(communities)):
    for j in range(0, len(communities)):
        Indicator[i].append(len(neighbors[i].intersection(communities[j])))


nodeslist=list(range(0,len(communities)))
edgeslist=[]
for i in range(0,len(communities)):
    for j in range(i+1,len(communities)):
        if Indicator[i][j]!=0:
            edgeslist.append((i,j))         

In [None]:
#Plotting communities graph
G2=nx.Graph()
G2.add_nodes_from(nodeslist)
G2.add_edges_from(edgeslist)

figure = plt.gcf()
nx.draw(G2, with_labels=True, node_color = '#32A891',node_size=350, font_weight='bold')
figure.set_size_inches(40, 40)

## Girvan-Newman algorithm to detect communities

In [None]:
#Girvan-Newman Algorithm
partition=nx_comm.girvan_newman(G1)

In [None]:
#Printing out communites and its length
length_of_communities=0
for i in partition:
    length_of_communities=length_of_communities+1
print(length_of_communities)
    

## InfoMap Algorithm to detect communities

In [None]:
#InfoMap Algorithm
coms = algorithms.infomap(G1)
#Writing to csv
readwrite.write_community_csv(coms, path="coms.csv")

In [None]:
#Reading the communities data to variable
coms_df=pd.read_csv("coms.csv", header=None)
coms_df

## Visualizing Communities

In [None]:
community_nodes1 = {2215,223703,413504,540136}

community_nodes2 = 

# {673, 1457, 1476, 2229, 2492, 3237, 4596, 18489, 25769, 43132, 46910, 48954, 61562, 65912, 80630,
#                    90707, 99110, 99366, 101152, 102047, 102541, 102544, 114017, 118741, 123959, 125365, 131706, 132938,
#                    136011, 139245, 141445, 142654, 145550, 148243, 149529, 149806, 156902, 167634, 178113, 179435, 179896,
#                    181892, 184678, 188041, 195711, 200015, 200740, 206060, 209890, 214516, 220266, 220371, 221758, 221810,
#                    225879, 234108, 244470, 244877, 248495, 254401, 276692, 276751, 279859, 280431, 294844, 320219, 324873,
#                    334239, 336977, 342906, 353653, 355473, 359473, 359758, 374219, 375146, 382287, 384396, 389377, 390909,
#                    393252, 404870, 414868, 416882, 436846, 438623, 438726, 444083, 450006, 452164, 462824, 468285, 479212,
#                    480692, 482469, 494913, 501444, 502784, 508812, 509779, 513668, 514735, 522958, 532299, 539923}

# Initialize a list to store edges in the community
community_edges1 = []
community_edges2 = []

# Iterate through all edges in the network
for u, v in G1.edges():
    # Check if both endpoints belong to the community
    if u in community_nodes1 and v in community_nodes1:
        # Add the edge to the list of edges in the community
        community_edges1.append((u, v))

        
# Iterate through all edges in the network
for u, v in G1.edges():
    # Check if both endpoints belong to the community
    if u in community_nodes2 and v in community_nodes2:
        # Add the edge to the list of edges in the community
        community_edges2.append((u, v))

        
# Print the edges present in the community
print("Edges present in the community 1:", community_edges1)
print("\nEdges present in the community 2:", community_edges2)

In [None]:
#Plotting graph of a community
G3=nx.Graph()
G3.add_edges_from(community_edges1)

figure = plt.gcf()
nx.draw(G3, with_labels=True, node_color = ['skyblue','cyan','magenta','peachpuff'],node_size=3000, font_weight='bold')

In [None]:
#Plotting graph of a community
G4=nx.Graph()
G4.add_edges_from(community_edges2)

figure = plt.gcf()
nx.draw(G4, with_labels=False, node_color = '#32A891',node_size=50, font_weight='bold')
#figure.set_size_inches(40, 40)