In [1]:
import pandas as pd
import networkx as nx
from networkx.algorithms import bipartite
import matplotlib.pyplot as plt
import seaborn as sns
from community import community_louvain
from scipy.cluster.hierarchy import linkage, dendrogram
from collections import Counter

In [2]:
#Plots (with Feedback)
sns.set_style("whitegrid") #style

#fonts
plt.rcParams['font.size'] = 12
plt.rcParams['axes.titlesize'] = 18
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
#feedback
HISTOGRAM_BINS = 25


In [6]:
# import data
df = pd.read_csv('https://raw.githubusercontent.com/Justin-Pascua/MATH-168-Group-Project/refs/heads/main/full_opensecrets_data.csv')
party_map = df.set_index('Senator')['Party'].to_dict()

In [8]:
#Bipartite Graph
print("\n Building Bipartite Graph ")
B = nx.Graph()
senator_nodes = df['Senator'].unique()
organization_nodes = df['Organization'].unique()
B.add_nodes_from(senator_nodes, bipartite='senator')
B.add_nodes_from(organization_nodes, bipartite='organization')
edges = [tuple(x) for x in df[['Senator', 'Organization']].to_numpy()]
B.add_edges_from(edges)
print("Bipartite graph created.")


 Building Bipartite Graph 
Bipartite graph created.


# **Bipartite Graph Centrality Analysis**

In [9]:
#Bipartite Graph Centrality analysis
print("\nCalculating Bipartite Graph Centrality Measures")

# Bipartite Degree Centrality
bipartite_degree = bipartite.degree_centrality(B, senator_nodes)
bipartite_degree.update(bipartite.degree_centrality(B, organization_nodes))

# Bipartite Betweenness Centrality
bipartite_betweenness = bipartite.betweenness_centrality(B, senator_nodes)
bipartite_betweenness.update(bipartite.betweenness_centrality(B, organization_nodes))

bipartite_df = pd.DataFrame({
    'Node': list(bipartite_degree.keys()),
    'Degree': list(bipartite_degree.values()),
    'Betweenness': list(bipartite_betweenness.values()),
})
bipartite_df['Type'] = bipartite_df['Node'].apply(lambda x: 'Senator' if x in senator_nodes else 'Organization')



Calculating Bipartite Graph Centrality Measures


In [10]:
print("\nTop 5 Nodes by Bipartite Centrality")
print("\n[Bipartite Degree Centrality]")
print(bipartite_df.sort_values(by='Degree', ascending=False).head())
print("\n[Bipartite Betweenness Centrality]")
print(bipartite_df.sort_values(by='Betweenness', ascending=False).head())


Top 5 Nodes by Bipartite Centrality

[Bipartite Degree Centrality]
                           Node  Degree  Betweenness          Type
1416               Comcast Corp    0.90     0.000000  Organization
1727                   AT&T Inc    0.80     0.000000  Organization
505                Deloitte LLP    0.73     0.000000  Organization
1486    Honeywell International    0.73     0.000033  Organization
434   National Assn of Realtors    0.69     0.000000  Organization

[Bipartite Betweenness Centrality]
                     Node    Degree  Betweenness          Type
134  Action Safety Supply  0.010000     0.062251  Organization
117    Deeley, King et al  0.010000     0.047792  Organization
46             Casey, Bob  0.031746     0.042996       Senator
92        Graham, Lindsey  0.031746     0.041976       Senator
67             Kaine, Tim  0.031746     0.040766       Senator


In [11]:
# Plot 1: Bipartite Degree
plt.figure(figsize=(10, 6))
sns.histplot(data=bipartite_df[bipartite_df['Type']=='Senator'], x='Degree', bins=HISTOGRAM_BINS)
plt.title('Bipartite Degree Distribution (Senators)')
plt.xlabel('Degree Centrality in Bipartite Graph')
plt.ylabel('Number of Senators')
plt.savefig('bipartite_senator_degree.png', dpi=300, bbox_inches='tight')
plt.close()
print("Saved 'bipartite_senator_degree.png'")

plt.figure(figsize=(10, 6))
sns.histplot(data=bipartite_df[bipartite_df['Type']=='Organization'], x='Degree', bins=HISTOGRAM_BINS)
plt.title('Bipartite Degree Distribution (Organizations)')
plt.xlabel('Degree Centrality in Bipartite Graph')
plt.ylabel('Number of Organizations')
plt.savefig('bipartite_organization_degree.png', dpi=300, bbox_inches='tight')
plt.close()
print("Saved 'bipartite_organization_degree.png'")


Saved 'bipartite_senator_degree.png'
Saved 'bipartite_organization_degree.png'


In [12]:
# Plot 2: Bipartite Betweenness
plt.figure(figsize=(10, 6))
sns.histplot(data=bipartite_df[bipartite_df['Type']=='Senator'], x='Betweenness', bins=HISTOGRAM_BINS)
plt.title('Bipartite Betweenness Distribution (Senators)')
plt.xlabel('Betweenness Centrality in Bipartite Graph')
plt.ylabel('Number of Senators')
plt.savefig('bipartite_senator_betweenness.png', dpi=300, bbox_inches='tight')
plt.close()
print("Saved 'bipartite_senator_betweenness.png'")

plt.figure(figsize=(10, 6))
sns.histplot(data=bipartite_df[bipartite_df['Type']=='Organization'], x='Betweenness', bins=HISTOGRAM_BINS)
plt.title('Bipartite Betweenness Distribution (Organizations)')
plt.xlabel('Betweenness Centrality in Bipartite Graph')
plt.ylabel('Number of Organizations')
plt.savefig('bipartite_organization_betweenness.png', dpi=300, bbox_inches='tight')
plt.close()
print("Saved 'bipartite_organization_betweenness.png'")


Saved 'bipartite_senator_betweenness.png'
Saved 'bipartite_organization_betweenness.png'


# **Senator Projection Centrality Analysis**

In [13]:
#Senator Projection create and centrality Analysis
print("\n Analyzing the Senator-Senator Projection ")
senator_graph = nx.bipartite.weighted_projected_graph(B, senator_nodes)
print("Senator projection created.")

print("\n Calculating Senator Projection Centrality Measures ")
degree_centrality = nx.degree_centrality(senator_graph)
betweenness_centrality = nx.betweenness_centrality(senator_graph)
eigenvector_centrality = nx.eigenvector_centrality(senator_graph, max_iter=1000)

centrality_df = pd.DataFrame({
    'Senator': list(degree_centrality.keys()),
    'Degree': list(degree_centrality.values()),
    'Betweenness': list(betweenness_centrality.values()),
    'Eigenvector': list(eigenvector_centrality.values())
})



 Analyzing the Senator-Senator Projection 
Senator projection created.

 Calculating Senator Projection Centrality Measures 


In [14]:
print("\n Top 5 Senators by Projection Centrality ")
print("\n[Degree Centrality (Most connected via shared donors)]")
print(centrality_df.sort_values(by='Degree', ascending=False).head())
print("\n[Betweenness Centrality (Key 'bridges' in the network)]")
print(centrality_df.sort_values(by='Betweenness', ascending=False).head())
print("\n[Eigenvector Centrality (Connected to other influential senators)]")
print(centrality_df.sort_values(by='Eigenvector', ascending=False).head())



 Top 5 Senators by Projection Centrality 

[Degree Centrality (Most connected via shared donors)]
              Senator  Degree  Betweenness  Eigenvector
1    Barrasso, John A     1.0     0.000028     0.100222
2     Bennet, Michael     1.0     0.000028     0.100222
3   Blackburn, Marsha     1.0     0.000028     0.100222
5          Blunt, Roy     1.0     0.000028     0.100222
28  Feinstein, Dianne     1.0     0.000028     0.100222

[Betweenness Centrality (Key 'bridges' in the network)]
              Senator  Degree  Betweenness  Eigenvector
1    Barrasso, John A     1.0     0.000028     0.100222
2     Bennet, Michael     1.0     0.000028     0.100222
3   Blackburn, Marsha     1.0     0.000028     0.100222
5          Blunt, Roy     1.0     0.000028     0.100222
28  Feinstein, Dianne     1.0     0.000028     0.100222

[Eigenvector Centrality (Connected to other influential senators)]
              Senator  Degree  Betweenness  Eigenvector
1    Barrasso, John A     1.0     0.000028     0

In [15]:
# for saving files
print("\n Generating and Saving Senator Projection Centrality Plots ")

plt.figure(figsize=(10, 6))
sns.histplot(data=centrality_df, x='Degree', bins=HISTOGRAM_BINS)
plt.title('Distribution of Senator Degree Centrality (Projection)')
plt.xlabel('Degree Centrality in Senator Projection')
plt.ylabel('Number of Senators')
plt.savefig('projection_senator_degree.png', dpi=300, bbox_inches='tight')
plt.close()
print("Saved 'projection_senator_degree.png'")

plt.figure(figsize=(10, 6))
sns.histplot(data=centrality_df, x='Betweenness', bins=HISTOGRAM_BINS)
plt.title('Distribution of Senator Betweenness Centrality (Projection)')
plt.xlabel('Betweenness Centrality in Senator Projection')
plt.ylabel('Number of Senators')
plt.savefig('projection_senator_betweenness.png', dpi=300, bbox_inches='tight')
plt.close()
print("Saved 'projection_senator_betweenness.png'")

plt.figure(figsize=(10, 6))
sns.histplot(data=centrality_df, x='Eigenvector', bins=HISTOGRAM_BINS)
plt.title('Distribution of Senator Eigenvector Centrality (Projection)')
plt.xlabel('Eigenvector Centrality in Senator Projection')
plt.ylabel('Number of Senators')
plt.savefig('projection_senator_eigenvector.png', dpi=300, bbox_inches='tight')
plt.close()
print("Saved 'projection_senator_eigenvector.png'")



 Generating and Saving Senator Projection Centrality Plots 
Saved 'projection_senator_degree.png'
Saved 'projection_senator_betweenness.png'
Saved 'projection_senator_eigenvector.png'


# **Donor Projection Centrality Analysis**

In [16]:
print("\n Analyzing the Donor-Donor Projection ")
donor_graph = nx.bipartite.weighted_projected_graph(B, organization_nodes)
print("Donor projection created.")

print("\n Calculating Donor Projection Centrality Measures ")
donor_degree_centrality = nx.degree_centrality(donor_graph)
donor_betweenness_centrality = nx.betweenness_centrality(donor_graph)

donor_centrality_df = pd.DataFrame({
    'Organization': list(donor_degree_centrality.keys()),
    'Degree': list(donor_degree_centrality.values()),
    'Betweenness': list(donor_betweenness_centrality.values()),
})



 Analyzing the Donor-Donor Projection 
Donor projection created.

 Calculating Donor Projection Centrality Measures 


In [17]:
print("\n Top 5 Donors by Projection Centrality ")
print("\n[Degree Centrality (Donors funding the most interconnected senators)]")
print(donor_centrality_df.sort_values(by='Degree', ascending=False).head())
print("\n[Betweenness Centrality (Donors bridging different senator groups)]")
print(donor_centrality_df.sort_values(by='Betweenness', ascending=False).head())



 Top 5 Donors by Projection Centrality 

[Degree Centrality (Donors funding the most interconnected senators)]
                  Organization    Degree  Betweenness
34                Comcast Corp  0.888536     0.046637
17                    AT&T Inc  0.827247     0.037356
232               Deloitte LLP  0.754208     0.028742
92   National Assn of Realtors  0.716418     0.026848
106         Berkshire Hathaway  0.709114     0.027426

[Betweenness Centrality (Donors bridging different senator groups)]
                  Organization    Degree  Betweenness
34                Comcast Corp  0.888536     0.046637
17                    AT&T Inc  0.827247     0.037356
232               Deloitte LLP  0.754208     0.028742
106         Berkshire Hathaway  0.709114     0.027426
92   National Assn of Realtors  0.716418     0.026848


In [18]:
print("\n Generating and Saving Donor Projection Centrality Plots ")
plt.figure(figsize=(10, 6))
sns.histplot(data=donor_centrality_df, x='Degree', bins=HISTOGRAM_BINS)
plt.title("Distribution of Donor Degree Centrality (Projection)")
plt.xlabel('Degree Centrality in Donor Projection')
plt.ylabel('Number of Donors')
plt.savefig('projection_donor_degree.png', dpi=300, bbox_inches='tight')
plt.close()
print("Saved 'projection_donor_degree.png'")

plt.figure(figsize=(10, 6))
sns.histplot(data=donor_centrality_df, x='Betweenness', bins=HISTOGRAM_BINS)
plt.title("Distribution of Donor Betweenness Centrality (Projection)")
plt.xlabel('Betweenness Centrality in Donor Projection')
plt.ylabel('Number of Donors')
plt.savefig('projection_donor_betweenness.png', dpi=300, bbox_inches='tight')
plt.close()
print("Saved 'projection_donor_betweenness.png'")


print("\n Analysis Complete ")



 Generating and Saving Donor Projection Centrality Plots 
Saved 'projection_donor_degree.png'
Saved 'projection_donor_betweenness.png'

 Analysis Complete 
