## Task 1: Centrality Extension

In [None]:
import pandas as pd
import networkx as nx

df_directorships = pd.read_csv("company_directorships.csv")
df_details = pd.read_csv("director-details.csv")

B = nx.Graph()
for company, director in zip(df_directorships.company_name, df_directorships.director_name):
    B.add_edge(company, director)

directors = df_directorships.director_name.unique()
G = nx.bipartite.projected_graph(B, directors)

degree_centrality = nx.degree_centrality(G)
eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000)

top_100 = sorted(degree_centrality, key=degree_centrality.get, reverse=True)[:100]
G_sampled = G.subgraph(top_100)
closeness_centrality = nx.closeness_centrality(G_sampled)

centrality_df = pd.DataFrame({
    "Director": list(G_sampled.nodes),
    "Degree Centrality": [degree_centrality[d] for d in G_sampled.nodes],
    "Eigenvector Centrality": [eigenvector_centrality[d] for d in G_sampled.nodes],
    "Closeness Centrality": [closeness_centrality[d] for d in G_sampled.nodes]
}).sort_values("Degree Centrality", ascending=False)

centrality_df.head(10)


## Task 2: Code Repair

In [None]:
import matplotlib.pyplot as plt

df_clean_roles = df_details[df_details['role'].notnull() & (df_details['role'].str.strip() != "")]
top_roles = df_clean_roles['role'].value_counts().head(10)

plt.figure(figsize=(10, 5))
top_roles.plot(kind='bar', color='skyblue')
plt.title('Top 10 Director Roles')
plt.ylabel('Number of Directors')
plt.xlabel('Role')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()


## Task 3: Explore Something New — Gender Distribution

In [None]:
# Exploring the gender distribution among directors
gender_counts = df_details['gender'].value_counts()

import matplotlib.pyplot as plt
plt.figure(figsize=(6,6))
gender_counts.plot(kind='pie', autopct='%1.1f%%', colors=['#66b3ff','#ff9999'], startangle=90)
plt.title('Gender Distribution of Directors')
plt.ylabel('')
plt.tight_layout()
plt.show()


## Task 4: Complementary Dataset

**Proposed Dataset:** ASIC Financial Reports  
**Source:** https://asic.gov.au/regulatory-resources/find-a-document/financial-reports

By combining financial data with our network, we can assess whether directors sit on financially successful boards.


In [None]:
financials_df = pd.DataFrame({
    'company_name': ['Alpha Corp', 'Beta Pty Ltd', 'Gamma Ltd', 'Delta Group', 'Epsilon Inc'],
    'revenue': [5000000, 800000, 15000000, 3000000, 12000000],
    'profit': [400000, -200000, 1200000, 100000, 950000]
})

merged_df = pd.merge(df_directorships, financials_df, on='company_name', how='inner')
director_scores = merged_df.groupby('director_name')[['revenue', 'profit']].mean().sort_values(by='revenue', ascending=False)
director_scores.head(5)


## Task 5b: Implementation of Financial Dataset Idea

In [None]:
# Already using the financial data from Task 4
# Here we further analyze which directors are linked to the most profitable companies

# Sort by profit instead of revenue
top_profit_directors = director_scores.sort_values(by='profit', ascending=False).head(5)
top_profit_directors


## Task 5c: Ethics

In [None]:
"""
1. Privacy of Individuals:
   Aggregating publicly available director data into a network raises privacy risks when linked to inferred power or performance.

2. Bias in Data:
   The dataset may reflect systemic inequalities (e.g., gender imbalance). Using it without corrections can reinforce bias.

3. Misinterpretation Risk:
   Centrality does not always mean skill or integrity. Over-relying on these metrics can lead to unfair decisions.

4. Accountability and Transparency:
   If used for VC decisions, the methodology should be disclosed to affected individuals.

5. Purpose:
   Network analysis should support ethical goals like improving diversity and oversight, not just profit maximization.
"""
