## Import related libraries

In [1]:
import csv
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from itertools import combinations
from operator import itemgetter
from collections import Counter
%matplotlib inline

In [2]:
#read the csv file
index=pd.read_csv('Filmandactor.csv')

#view the first five rows in data
index.head()

Unnamed: 0,Actor,Film,Year,Genre,Gender,Races,Debut year
0,Amai Kamarudin,7 Hari Kisah Cinta Langkawi,2019,Comedy,Male,Malay,2008
1,Josiah Hogan,7 Hari Kisah Cinta Langkawi,2019,Comedy,Male,Malay,2013
2,Shaharuddin Thamby,7 Hari Kisah Cinta Langkawi,2019,Comedy,Male,Malay,1989
3,Sharifah Shahirah,7 Hari Kisah Cinta Langkawi,2019,Comedy,Female,Malay,1993
4,Chi Azim,7 Hari Kisah Cinta Langkawi,2019,Comedy,Male,Malay,2019


## Calculate the total number of films for each actors

In [3]:
new_index = index.Actor.value_counts().rename_axis('Actor').to_frame('Total film')

#view the first five rows in data
new_index.head()

Unnamed: 0_level_0,Total film
Actor,Unnamed: 1_level_1
Pekin Ibrahim,16
Namron,12
Soffi Jikan,10
Remy Ishak,10
Zizan Razak,9


In [4]:
#merge output new_index column into original data frame
index = pd.merge(index,new_index,on ='Actor',how ='outer') 

#view the first five rows in data
index.head()

Unnamed: 0,Actor,Film,Year,Genre,Gender,Races,Debut year,Total film
0,Amai Kamarudin,7 Hari Kisah Cinta Langkawi,2019,Comedy,Male,Malay,2008,2
1,Amai Kamarudin,Temuan Takdir,2016,Crime,Male,Malay,2008,2
2,Josiah Hogan,7 Hari Kisah Cinta Langkawi,2019,Comedy,Male,Malay,2013,6
3,Josiah Hogan,KL Special Force,2018,Action,Male,Malay,2013,6
4,Josiah Hogan,KL Vampires,2019,Action,Male,Malay,2013,6


## Creating nodes and edges from our data

In [5]:
def get_combinations(group):
    return pd.DataFrame([sorted(e) for e in list(combinations(group['Actor'].values, 2))], columns=['from', 'to'])

edges = index.groupby('Film').apply(get_combinations) #get all 2-combinations of names within each group
edges = edges.groupby(['from', 'to']).size().to_frame('weight').reset_index() #group by the node names to obtain the weight
#edges.to_csv('Actor_edges.csv', index=False) #save into different file
edges.head()

Unnamed: 0,from,to,weight
0,A. Galak,AC Mizal,1
1,A. Galak,Achey Bocey,1
2,A. Galak,Along Eyzendy,1
3,A. Galak,Atu Zero,1
4,A. Galak,Azlee Senario,1


In [6]:
index.drop_duplicates(subset ="Actor",keep = "first", inplace = True)
index.drop(index.columns[[1,2,3]], axis=1, inplace=True)

#save to new file
#index.to_csv('Actor_nodes.csv', index=False) 
index.head()

Unnamed: 0,Actor,Gender,Races,Debut year,Total film
0,Amai Kamarudin,Male,Malay,2008,2
2,Josiah Hogan,Male,Malay,2013,6
8,Shaharuddin Thamby,Male,Malay,1989,5
13,Sharifah Shahirah,Female,Malay,1993,2
15,Chi Azim,Male,Malay,2019,1


## Create network from dataframe

In [7]:
G = nx.from_pandas_edgelist(edges, 'from', 'to', 'weight')

## Set node attributes

In [8]:
nx.set_node_attributes(G, index.set_index('Actor').to_dict('index'))

In [9]:
# check attribute is correct
list(G.nodes.data())

[('A. Galak',
  {'Gender': 'Male', 'Races': 'Malay', 'Debut year': 1981, 'Total film': 3}),
 ('AC Mizal',
  {'Gender': 'Male', 'Races': 'Malay', 'Debut year': 1999, 'Total film': 1}),
 ('Achey Bocey',
  {'Gender': 'Male', 'Races': 'Malay', 'Debut year': 2014, 'Total film': 3}),
 ('Along Eyzendy',
  {'Gender': 'Male', 'Races': 'Malay', 'Debut year': 2003, 'Total film': 6}),
 ('Atu Zero',
  {'Gender': 'Male', 'Races': 'Others', 'Debut year': 2014, 'Total film': 1}),
 ('Azlee Senario',
  {'Gender': 'Male', 'Races': 'Malay', 'Debut year': 1999, 'Total film': 1}),
 ("Dato' Jalaluddin Hassan",
  {'Gender': 'Male', 'Races': 'Malay', 'Debut year': 1994, 'Total film': 9}),
 ("Dato' Rahim Razali",
  {'Gender': 'Male', 'Races': 'Malay', 'Debut year': 1978, 'Total film': 4}),
 ('Fizz Fairuz',
  {'Gender': 'Male', 'Races': 'Malay', 'Debut year': 2006, 'Total film': 5}),
 ('Hafiz Bahari',
  {'Gender': 'Male', 'Races': 'Malay', 'Debut year': 2012, 'Total film': 3}),
 ('Nabila Huda',
  {'Gender': 'Fem

## Network info

In [10]:
print(nx.info(G))
print("Network density:", nx.density(G))

Name: 
Type: Graph
Number of nodes: 695
Number of edges: 4190
Average degree:  12.0576
Network density: 0.01737399705595754


## Calculate centralities and set as attributes

In [11]:
betweenness_dict = nx.betweenness_centrality(G) # Run betweenness centrality
eigenvector_dict = nx.eigenvector_centrality(G) # Run eigenvector centrality
closeness_dict = nx.closeness_centrality(G) # Run closeness centrality
ndegree_dict = {n: d for n, d in G.degree()} # Run degree
degree_dict = {n: d for n, d in G.degree(weight="weight")} #Run weighted degree

# Assign each to an attribute in your network
nx.set_node_attributes(G, betweenness_dict, 'betweenness')
nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')
nx.set_node_attributes(G, closeness_dict, 'closeness')
nx.set_node_attributes(G, ndegree_dict, 'degree')
nx.set_node_attributes(G, degree_dict, 'weighted_degree')


## Top 10 nodes in each centrality

In [12]:
sorted_degree = sorted(ndegree_dict.items(), key=itemgetter(1), reverse=True)

print("Top 10 nodes by degree:")
for d in sorted_degree[:10]:
    print(d)

Top 10 nodes by degree:
('Namron', 79)
('Pekin Ibrahim', 77)
('Remy Ishak', 63)
("Dato' Jalaluddin Hassan", 62)
('Soffi Jikan', 62)
('Zizan Razak', 55)
('Faizal Hussein', 54)
('Kazar Saisi', 53)
('Nora Danish', 50)
('Kamarool Yusoff', 49)


In [13]:
sorted_weighted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True)

print("Top 10 nodes by weighted degree:")
for wd in sorted_weighted_degree[:10]:
    print(wd)

Top 10 nodes by weighted degree:
('Pekin Ibrahim', 91)
('Namron', 85)
('Remy Ishak', 69)
('Soffi Jikan', 68)
("Dato' Jalaluddin Hassan", 63)
('Zizan Razak', 60)
('Faizal Hussein', 60)
('Nora Danish', 58)
('Kamarool Yusoff', 57)
('Kazar Saisi', 55)


In [14]:
sorted_betweenness = sorted(betweenness_dict.items(), key=itemgetter(1), reverse=True)

print("Top 10 nodes by betweenness centrality:")
for b in sorted_betweenness[:10]:
    print(b)

Top 10 nodes by betweenness centrality:
('Namron', 0.059373294748224115)
('Pekin Ibrahim', 0.05462922047006967)
('Remy Ishak', 0.054465631211705605)
('Nur Fathiah Diaz', 0.05035326505067139)
("Dato' Jalaluddin Hassan", 0.04794044899510774)
('Fadlan Hazim', 0.04415906304491798)
('Kazar Saisi', 0.03983286419437152)
('Soffi Jikan', 0.03708206352161522)
('Faizal Hussein', 0.03016197383200122)
('Ruminah Sidek', 0.02892156370264485)


In [15]:
sorted_closeness = sorted(closeness_dict.items(), key=itemgetter(1), reverse=True)

print("Top 10 nodes by closeness:")
for a in sorted_closeness[:10]:
    print(a)

Top 10 nodes by closeness:
('Pekin Ibrahim', 0.42646540002933375)
('Namron', 0.4175627553149167)
('Soffi Jikan', 0.4167208949211366)
('Remy Ishak', 0.4120137485997683)
('Faizal Hussein', 0.41174016709339395)
('Saiful Apek', 0.40661028960173856)
("Dato' Jalaluddin Hassan", 0.4029114305670249)
('Nora Danish', 0.4013467259822986)
('Kamarool Yusoff', 0.4003103238493553)
('Siti Saleha', 0.3979978765357197)


In [16]:
sorted_eigenvector = sorted(eigenvector_dict.items(), key=itemgetter(1), reverse=True)

print("Top 10 nodes by eigenvector centrality:")
for c in sorted_eigenvector[:10]:
    print(c)

Top 10 nodes by eigenvector centrality:
('Namron', 0.19967464625422612)
('Pekin Ibrahim', 0.19325222727853142)
('Soffi Jikan', 0.17659694908808757)
('Remy Ishak', 0.15371201717162086)
('Faizal Hussein', 0.15352673581744974)
('Nora Danish', 0.13550428606091336)
('Wan Hanafi Su', 0.13228510156119083)
("Dato' Jalaluddin Hassan", 0.13224347484391571)
('Shaheizy Sam', 0.12794724902302476)
('Zizan Razak', 0.127328916943698)
