PART 3

In [40]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


Q1: Centrality Calculation Matrix 

Calculate the following centrality measures for ALL nodes:
1. Degree Centrality
2. Betweenness Centrality
3. Closeness Centrality
   
Deliverables:
• Create a pandas DataFrame with Node ID and all three centrality measures.
• All centrality values must be normalized to a 0-1 scale for comparison.
• Identify the top 5 nodes for each centrality measure.

In [41]:
karate_edges = [
    (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 10), (0, 11), (0, 12), (0, 13), (0, 17), (0, 19), (0, 21), (0, 31),
    (1, 2), (1, 3), (1, 7), (1, 13), (1, 17), (1, 19), (1, 21), (1, 30),
    (2, 3), (2, 7), (2, 8), (2, 9), (2, 13), (2, 27), (2, 28), (2, 32),
    (3, 7), (3, 12), (3, 13),
    (4, 6), (4, 10),
    (5, 6), (5, 10), (5, 16),
    (6, 16),
    (8, 30), (8, 32), (8, 33),
    (9, 33),
    (13, 33),
    (14, 32), (14, 33),
    (15, 32), (15, 33),
    (18, 32), (18, 33),
    (19, 33),
    (20, 32), (20, 33),
    (22, 32), (22, 33),
    (23, 25), (23, 27), (23, 29), (23, 32), (23, 33),
    (24, 25), (24, 27), (24, 31),
    (25, 31),
    (26, 29), (26, 33),
    (27, 33),
    (28, 31), (28, 33),
    (29, 32), (29, 33),
    (30, 32), (30, 33),
    (31, 32), (31, 33),
    (32, 33)
]

In [42]:
G = nx.Graph()

for tup in karate_edges :
    G.add_edge(tup[0],tup[1])

print(G.number_of_nodes())
print(G.number_of_edges())

34
78


In [43]:
ct = nx.degree_centrality(G)
bw = nx.betweenness_centrality(G)
cl = nx.closeness_centrality(G)
vertex = np.arange(34)


In [44]:
ct_g = [0]*34
bw_g = [0]*34
cl_g = [0]*34

for key,value in ct.items():
    ct_g[key] = value

for key,value in bw.items():
    bw_g[key] = value

for key,value in cl.items():
    cl_g[key] = value


df = pd.DataFrame({'Nodes' : vertex ,
                   'Degree' : ct_g ,
                  'Betweenness' : bw_g ,
                  'Closeness' : cl_g})

In [46]:
df_sorted = df.sort_values(by='Degree', ascending=False)
print("Top 5 nodes for Degree is :\n", df_sorted.iloc[0:5,0:1])

Top 5 nodes for Degree is :
     Nodes
33     33
0       0
32     32
2       2
1       1


In [58]:
df_sorted = df.sort_values(by='Betweenness', ascending=False)
print("Top 5 nodes for Betweenness is :\n", df_sorted.iloc[0:5,0:1])

Top 5 nodes for Betweenness is :
     Nodes
0       0
33     33
32     32
2       2
31     31


In [59]:
df_sorted = df.sort_values(by='Closeness', ascending=False)
print("Top 5 nodes for Closeness is :\n", df_sorted.iloc[0:5,0:1])

Top 5 nodes for Closeness is :
     Nodes
0       0
2       2
33     33
31     31
13     13


Q2: Create a Centrality Correlation Scatter Plot

Visualize the relationship between different centrality measures to understand how
they correlate in the Karate Club network. Create a scatter plot that shows the
relationship between Degree Centrality and Betweenness Centrality.
Requirements:
• X-axis: Degree Centrality values
• Y-axis: Betweenness Centrality values
• Professional formatting: Title, axis labels, grid
    
Note: For solving question 2 you should have completed Question 1 and have:
• A pandas DataFrame (centrality df) with all centrality measures
• Top 5 nodes identified for each centrality measure

In [None]:
plt.scatter(ct_g , bw_g)
plt.xlabel('Degree Centrality Value')
plt.ylabel('Betweenness Centrality Values')
plt.title('Centality Correlation Scatter Plot')
plt.legend()
plt.grid()

PART 1

Q1 
Task: Actor Collaboration Strengths (Node & Edge Weights)

Print the number of nodes and edges in a graph built as follows:
• Nodes represent actors who have acted in films with an IMDB rating between
8.5 and 9.
• Edges represent movie collaborations between two actors (added if they have
a film in common).
• Edge weights are increased by 1 for every movie collaboration, representing
the strength of actor collaborations.

Workflow :
1. Filter rows with “Rating” between 8.5 and 9.
2. Form all pairs of actors belonging to a particular film.3. Add the pairs as edges to an initially empty graph (created using networkx).
4. Increase the ’weight’ property of an edge by 1, every time the corresponding
pair is encountered.

In [127]:
df_actor = pd.read_csv('actorfilms.csv')


In [128]:
df_actor = df_actor[((df_actor['Rating']>8.5) & (df_actor['Rating']<9))]


In [129]:
grouped_actors = df_actor.groupby('Film')

res = dict()

for Film , group in grouped_actors :
    res[Film] = list(group['Actor'])



In [78]:
from itertools import combinations
actor = nx.Graph()

for key,value in res.items():
    ls = (list(combinations(value,2)))

    for item in ls :

        if actor.has_edge(item[0] , item[1]):
            actor[item[0]][item[1]]['weight'] = actor[item[0]][item[1]]['weight']+1
        else :
            actor.add_edge(item[0] , item[1] , weight = 1)            

In [81]:

actor.number_of_nodes()

292

In [82]:
actor.number_of_edges()

2154

Q2 Task: Collaboration Network (Graph) Visualization

Visualize the collaboration network with:
• Only those edges and their respective nodes that have an edge weight of 2
or above (stronger collaborations only).
• Nodes should be labelled by the actor names.
Workflow :
1. Form a new graph by selecting those edges with a weight greater than 1.
2. Print the number of nodes and edges of the above subgraph
3. Nodes, Edges, and node labels can be drawn using the library functions from
networkx and matplotlib.

In [None]:
reduced_actor = nx.Graph()

for u,v,data in actor.edges(data=True):
    if(data['weight']>1):
        reduced_actor.add_edge(u,v,weight=data['weight'])

plt.figure(figsize=(25,25))
pos = nx.spring_layout(reduced_actor , k=2.5,seed=25)

nx.draw_networkx_nodes(reduced_actor , pos)
nx.draw_networkx_edges(reduced_actor , pos)
nx.draw_networkx_labels(reduced_actor , pos)
plt.show()

In [88]:
reduced_actor.number_of_nodes()

35

In [89]:
reduced_actor.number_of_edges()

305

PART 2

Q1: Total Users and Trust Relationships
Task:
1. Load the dataset into Python.
2. Create a graph using the dataset.
3. Print the total number of users (nodes) and trust relationships (edges) in the
entire network.

In [2]:
Network = nx.read_edgelist("soc-Epinions1.txt",nodetype=int,comments='%',data=False)
print("Total Number of Users :" , Network.number_of_nodes())
print("Total Number of trust relationships :" , Network.number_of_edges())

NameError: name 'nx' is not defined

2. Q: Random 1000 Users from total number of users (nodes) – With and
Without Isolated Users
We want to study a small random sample of 1000 users:

    2. (a): Select 1000 random users and create a subgraph.
• Print the number of users (nodes) and trust relationships (edges).
• Plot the subgraph.

In [103]:
import random
random_nodes = random.sample(list(Network.nodes()),1000)
sub_Network = Network.subgraph(random_nodes)

print("Total Number of Users :" , sub_Network.number_of_nodes())
print("Total Number of trust relationships :" , sub_Network.number_of_edges())

Total Number of Users : 1000
Total Number of trust relationships : 71


In [None]:
plt.figure(figsize=(25,25))
pos = nx.spring_layout(sub_Network , k=2.5 , seed=25)

nx.draw_networkx_nodes(sub_Network , pos)
nx.draw_networkx_edges(sub_Network , pos)
plt.show()

2(b): Remove isolated users (users with no trust relationships).
• Print the number of users (nodes) and trust relationships (edges) after removal.
• Plot the subgraph again to compare.

In [112]:
nodes = list(nx.isolates(sub_Network))

unfrozen_graph = nx.Graph(sub_Network)
unfrozen_graph.remove_nodes_from(nodes)

In [None]:
plt.figure(figsize=(25,25))
pos = nx.spring_layout(unfrozen_graph , k=2.5 , seed=25)

nx.draw_networkx_nodes( unfrozen_graph, pos)
nx.draw_networkx_edges( unfrozen_graph, pos)
plt.show()

Q3: Similarity Between Users (Jaccard Coefficient) Given two users A and B
in a social network:
Let N(A) be the set of users that A trusts & N(B) be the set of users that B trusts.
The Jaccard coefficient is calculated as::
J(A, B) = |N(A) ∩ N(B)|
|N(A) ∪ N(B)|
It measures how similar two users are based on the overlap of their friends.
Task:
1. Using the random 1000-user subgraph (before isolated nodes are removed),
calculate the Jaccard similarity for each pair of connected users.
2. Print the top 5 pairs of users with the highest similarity scores.

In [124]:
ls = nx.jaccard_coefficient(sub_Network, ebunch=None)
preds = sorted(ls,key = lambda x: x[2] , reverse = True)

In [1]:
preds[0:5]

NameError: name 'preds' is not defined