In [None]:
### Construct the June 2015 network

import pandas as pd 
import graph_tool.all as gt
import matplotlib.pyplot as plt
import numpy as np

np.random.seed(1234) # Set a seed to be used consistently between the different years' network constructions

sorted_062015 = pd.read_csv('../Data/Cleaned files/june15_subset.csv')  

In [None]:
## Visualizations

unique_companies_062015 = sorted_062015['Address1'].unique()  # Pull out the individual management company names
total_nv_network_062015 = gt.Graph()  # Create an empty graph to populate with the data

hoa_vertices = {} # Set empty dictionaries with the respective nodes
company_vertices = {}

In [None]:
## Statistics (for professionally managed HOAs)

# Degree centrality: add up total number of edges (representing company-HOA connections), sort by the most, and calculate proportion
degrees = total_nv_network_062015.degree_property_map('total')
total_degree_centrality_sum = total_nv_network_062015.num_edges()
top_10 = sorted(total_nv_network_062015.vertices(), key=lambda v: degrees[v], reverse=True)[:10]
print("Top 10 Vertices by Degree Centrality - June 2015:")
for i, vertex in enumerate(top_10):
    company_name = company_name_prop[vertex]
    degree_centrality = degrees[vertex]
    proportion_of_total = (degree_centrality / total_degree_centrality_sum)*100
    print(f"{i+1}. {company_name}, Number of HOAs: {degree_centrality}, Proportion of Total: {proportion_of_total:.1f}")

# Average community size by firm (in units): group by company, add up number of units responsible for and number of HOAs, and sort by the most
company_stats = sorted_062015.groupby('Address1').agg({'# of Units': 'sum', 'Name': 'count'})
company_stats['Average Community Size'] = company_stats['# of Units'] / company_stats['Name']
company_stats['Average Community Size'] = company_stats['Average Community Size'].round()
top_10_average_community_size = company_stats.sort_values(by='Average Community Size', ascending=False).head(10)
print("Top 10 Companies by Average Community Size - June 2015:")
print(top_10_average_community_size)

In [None]:
### Construct the June 2025 network

# Necessary packages are already loaded in

np.random.seed(1234)

sorted_062025 = pd.read_csv('../Data/Cleaned files/june25_subset.csv')  

In [None]:
## Visualizations

unique_companies_062025 = sorted_062025['Address1'].unique()  
total_nv_network_062025 = gt.Graph()  

hoa_vertices = {} 
company_vertices = {}

In [None]:
## Statistics (for professionally managed HOAs)

# Degree centrality
degrees = total_nv_network_062025.degree_property_map('total')
total_degree_centrality_sum = total_nv_network_062025.num_edges()
top_10 = sorted(total_nv_network_062025.vertices(), key=lambda v: degrees[v], reverse=True)[:10]
print("Top 10 Vertices by Degree Centrality - June 2025:")
for i, vertex in enumerate(top_10):
    company_name = company_name_prop[vertex]
    degree_centrality = degrees[vertex]
    proportion_of_total = (degree_centrality / total_degree_centrality_sum)*100
    print(f"{i+1}. {company_name}, Number of HOAs: {degree_centrality}, Proportion of Total: {proportion_of_total:.1f}")

# Average community size by firm 
company_stats = sorted_062025.groupby('Address1').agg({'# of Units': 'sum', 'Name': 'count'})
company_stats['Average Community Size'] = company_stats['# of Units'] / company_stats['Name']
company_stats['Average Community Size'] = company_stats['Average Community Size'].round()
top_10_average_community_size = company_stats.sort_values(by='Average Community Size', ascending=False).head(10)
print("Top 10 Companies by Average Community Size - June 2025:")
print(top_10_average_community_size)

In [None]:
## Rewiring (going to need to adjust to perform in parallel etc.)

# Step 1: Log transform the actual structure's degree values
logged_degrees_actual = np.log(total_nv_network_062025.degree_property_map('total').a)

# Step 2: Build the random rewiring function (only changing edges from HOAs to other possible companies, not each other), iterate and check progress, and fill in list of results
def rewire_edges(graph):
    edges = list(graph.edges())
    np.random.shuffle(edges)
    for edge in edges:
        source, target = edge
        source_degree = graph.degree_property_map('total')[source]
        target_degree = graph.degree_property_map('total')[target]
        if source_degree == 1 and target_degree > 1:
            possible_targets = [v for v in graph.vertices() if v!= source and graph.degree_property_map('total')[v] > 1 and not graph.edge(source, v)]
            new_target = np.random.choice(possible_targets)
            graph.remove_edge(edge)
            graph.add_edge(source, new_target)
    return graph

iterations = 100
rewired_networks = []
for i in range(iterations):
    print(f"Iteration {i+1}/{iterations}")
    rewired_network = rewire_edges(total_nv_network_062025.copy())
    rewired_networks.append(rewired_network)
    
# Step 3: Log transform the rewirings' degree values and calculate the mean and SD of them sample population we have created
logged_degrees_rewired = []
for rewired_network in rewired_networks:
    degrees_rewired = rewired_network.degree_property_map('total').a
    logged_degrees_rewired.append(np.log(degrees_rewired))

mean_logged_degrees_rewired = np.mean(logged_degrees_rewired, axis=0)
std_logged_degrees_rewired = np.mean(logged_degrees_rewired, axis=0)

# Step 4: Calculate the z-scores for the rewirings using above information
z_scores = []
for degrees in logged_degrees_rewired:
    z_score = (degrees - mean_logged_degrees_rewired) / std_logged_degrees_rewired
    z_scores.append(z_score)

# Step 5: Calculate the actual structure's z-scores using above information
actual_z_score = (logged_degrees_actual - - mean_logged_degrees_rewired) / std_logged_degrees_rewired