In [None]:
## Set up packages, so that my notebook runs inside an enviornment where graph-tool is saved and operating

import pandas as pd 
import graph_tool.all as gt
import matplotlib.pyplot as plt
import numpy as np

In [None]:
### Construct the June 2015 network ("professionally managed" only); using the same format as the looped files

np.random.seed(1234) # Set a seed to be used consistently between the different years' network constructions

sorted_062015 = pd.read_csv('../Data/Cleaned files/NRED HOA - 06 2025_subset.csv') 

sorted_062015

In [None]:
## Visualizations

unique_companies_062015 = sorted_062015['Firm_Final'].unique()  # Pull out the individual management company names, changed to final
total_nv_network_062015 = gt.Graph()  # Create an empty graph to populate with the data

# Any decisions regarding which managed ones to keep in (e.g. individually registered) and how to label by size/type?

hoa_vertices = {} # Set empty dictionaries with the respective nodes
company_vertices = {}

company_name_prop = total_nv_network_062015.new_vertex_property("string")
company_city_prop = total_nv_network_062015.new_vertex_property("string")
company_state_prop = total_nv_network_062015.new_vertex_property("string") 

for company_062015 in unique_companies_062015: # Sort through the management companies, store their information, and add as vertices to the graph
    company_data_062015 = sorted_062015[sorted_062015['Firm_Final'] == company_062015]
    company_vertex = total_nv_network_062015.add_vertex()
    company_vertices[company_062015] = company_vertex
    company_name_prop[company_vertex] = company_062015

    city = company_data_062015.iloc[0]['City']  
    state = company_data_062015.iloc[0]['State']  
    company_city_prop[company_vertex] = city  
    company_state_prop[company_vertex] = state 
    
    for index, row in company_data_062015.iterrows(): # Iterate through the company data, add the unique HOAs (which are now rows in the transformed data), and draw edges between the node types
        hoa_name = row['Name']
        if hoa_name not in hoa_vertices:
            hoa_vertex = total_nv_network_062015.add_vertex()
            hoa_vertices[hoa_name] = hoa_vertex
        else:
            hoa_vertex = hoa_vertices[hoa_name]
        total_nv_network_062015.add_edge(hoa_vertex, company_vertex)

pos = gt.sfdp_layout(total_nv_network_062015, K=10) # Space out the nodes using the force-directed algorithm

# Color and sizing
vertex_color = total_nv_network_062015.new_vertex_property("vector<double>") # Assign color scheme based on HOA or management (and latter's location)

for v in total_nv_network_062015.vertices():
    if v in company_vertices.values():
        city = company_city_prop[v]
        state = company_state_prop[v]
        if state != "NV":
            vertex_color[v] = (0.5, 0, 0.5, 0.75)  # Purple color for companies with address outside NV
        elif city in ["Boulder City", "Henderson", "Las Vegas", "Mesquite", "North Las Vegas", "Blue Diamond", "Bunkerville", "Cal-Nev-Ari", "Enterprise", "Goodsprings", "Indian Springs", "Laughlin", "Moapa Town", "Moapa Valley", "Mount Charleston", "Nelson", "Paradise", "Sandy Valley", "Searchlight", "Spring Valley", "Summerlin South", "Sunrise Manor", "Whitney", "Winchester"]:
            vertex_color[v] = (0, 1, 0, 0.75)  # Green color for Vegas MSA-headquartered companies
        else:
            vertex_color[v] = (1, 0, 0, 0.75)  # Red color for other cities in NV
    else:
        vertex_color[v] = (0, 0, 1, 0.75)  # Blue color for all individual HOAs
        
# Graphic creation and save
gt.graph_draw(total_nv_network_062015, pos, vertex_fill_color=vertex_color, output_size=(1000, 1000), output="../Output/062015_managed.png") # Draw and save the graphic to the project folder
plt.show() 

In [None]:
## Statistics (for professionally managed HOAs)

# Degree centrality: add up total number of edges (representing company-HOA connections), sort by the most, and calculate proportion
degrees = total_nv_network_062015.degree_property_map('total')
total_degree_centrality_sum = total_nv_network_062015.num_edges()
top_10 = sorted(total_nv_network_062015.vertices(), key=lambda v: degrees[v], reverse=True)[:10]
print("Top 10 Vertices by Degree Centrality - June 2015:")
for i, vertex in enumerate(top_10):
    company_name = company_name_prop[vertex]
    degree_centrality = degrees[vertex]
    proportion_of_total = (degree_centrality / total_degree_centrality_sum)*100
    print(f"{i+1}. {company_name}, Number of HOAs: {degree_centrality}, Proportion of Total: {proportion_of_total:.1f}")

# Average community size by firm (in units): group by company, add up number of units responsible for and number of HOAs, and sort by the most
company_stats = sorted_062015.groupby('Firm_Final').agg({'# of Units': 'sum', 'Name': 'count'})
company_stats['Average Community Size'] = company_stats['# of Units'] / company_stats['Name']
company_stats['Average Community Size'] = company_stats['Average Community Size'].round()
top_10_average_community_size = company_stats.sort_values(by='Average Community Size', ascending=False).head(10)
print("Top 10 Companies by Average Community Size - June 2015:")
print(top_10_average_community_size)

In [None]:
### Construct the June 2015 network (all)
### Should appear the same but with freestanding yellow nodes for non-managed (under my definition)

In [None]:
### Construct the June 2025 network ("professionally managed" only)

# Necessary packages are already loaded in

np.random.seed(1234)

sorted_062025 = pd.read_csv('../Data/Cleaned files/june25_subset.csv')  

In [None]:
## Visualizations

unique_companies_062025 = sorted_062025['Firm_Final'].unique()  
total_nv_network_062025 = gt.Graph()  

hoa_vertices = {} 
company_vertices = {}

# Any decisions regarding which managed ones to keep in (e.g. individually registered) and how to label by size/type?

hoa_vertices = {} 
company_vertices = {}

company_name_prop = total_nv_network_062025.new_vertex_property("string")
company_city_prop = total_nv_network_062025.new_vertex_property("string")
company_state_prop = total_nv_network_062025.new_vertex_property("string") 

for company_062025 in unique_companies_062025: 
    company_data_062025 = sorted_062025[sorted_062025['Firm_Final'] == company_062025]
    company_vertex = total_nv_network_062025.add_vertex()
    company_vertices[company_062025] = company_vertex
    company_name_prop[company_vertex] = company_062025

    city = company_data_062025.iloc[0]['City']  
    state = company_data_062025.iloc[0]['State']  
    company_city_prop[company_vertex] = city  
    company_state_prop[company_vertex] = state 
    
    for index, row in company_data_062025.iterrows(): 
        hoa_name = row['Name']
        if hoa_name not in hoa_vertices:
            hoa_vertex = total_nv_network_062025.add_vertex()
            hoa_vertices[hoa_name] = hoa_vertex
        else:
            hoa_vertex = hoa_vertices[hoa_name]
        total_nv_network_062025.add_edge(hoa_vertex, company_vertex)

pos = gt.sfdp_layout(total_nv_network_062025, K=10) 

# Color and sizing
vertex_color = total_nv_network_062025.new_vertex_property("vector<double>") 

for v in total_nv_network_062025.vertices():
    if v in company_vertices.values():
        city = company_city_prop[v]
        state = company_state_prop[v]
        if state != "NV":
            vertex_color[v] = (0.5, 0, 0.5, 0.75)  
elif city in ["Boulder City", "Henderson", "Las Vegas", "Mesquite", "North Las Vegas", "Blue Diamond", "Bunkerville", "Cal-Nev-Ari", "Enterprise", "Goodsprings", "Indian Springs", "Laughlin", "Moapa Town", "Moapa Valley", "Mount Charleston", "Nelson", "Paradise", "Sandy Valley", "Searchlight", "Spring Valley", "Summerlin South", "Sunrise Manor", "Whitney", "Winchester"]:            
    vertex_color[v] = (0, 1, 0, 0.75)  
    else:
        vertex_color[v] = (1, 0, 0, 0.75)  
else:
    vertex_color[v] = (0, 0, 1, 0.75)  
        
# Graphic creation and save
gt.graph_draw(total_nv_network_062025, pos, vertex_fill_color=vertex_color, output_size=(1000, 1000), output="../Output/062025_managed.png") # Draw and save the graphic to the project folder
plt.show() 

In [None]:
## Statistics (for professionally managed HOAs)

# Degree centrality
degrees = total_nv_network_062025.degree_property_map('total')
total_degree_centrality_sum = total_nv_network_062025.num_edges()
top_10 = sorted(total_nv_network_062025.vertices(), key=lambda v: degrees[v], reverse=True)[:10]
print("Top 10 Vertices by Degree Centrality - June 2025:")
for i, vertex in enumerate(top_10):
    company_name = company_name_prop[vertex]
    degree_centrality = degrees[vertex]
    proportion_of_total = (degree_centrality / total_degree_centrality_sum)*100
    print(f"{i+1}. {company_name}, Number of HOAs: {degree_centrality}, Proportion of Total: {proportion_of_total:.1f}")

# Average community size by firm 
company_stats = sorted_062025.groupby('Firm_Final').agg({'# of Units': 'sum', 'Name': 'count'})
company_stats['Average Community Size'] = company_stats['# of Units'] / company_stats['Name']
company_stats['Average Community Size'] = company_stats['Average Community Size'].round()
top_10_average_community_size = company_stats.sort_values(by='Average Community Size', ascending=False).head(10)
print("Top 10 Companies by Average Community Size - June 2025:")
print(top_10_average_community_size)

In [None]:
### Construct the June 2025 network (all)

In [None]:
## Rewiring (going to need to adjust to perform in parallel, as well as use an adjacency matrix)

# Step 1: Log transform the actual structure's degree values
logged_degrees_actual = np.log(total_nv_network_062025.degree_property_map('total').a)

# Step 2: Build the random rewiring function (only changing edges from HOAs to other possible companies, not each other), iterate and check progress, and fill in list of results
def rewire_edges(graph):
    edges = list(graph.edges())
    np.random.shuffle(edges)
    for edge in edges:
        source, target = edge
        source_degree = graph.degree_property_map('total')[source]
        target_degree = graph.degree_property_map('total')[target]
        if source_degree == 1 and target_degree > 1:
            possible_targets = [v for v in graph.vertices() if v!= source and graph.degree_property_map('total')[v] > 1 and not graph.edge(source, v)]
            new_target = np.random.choice(possible_targets)
            graph.remove_edge(edge)
            graph.add_edge(source, new_target)
    return graph

iterations = 100 # Can play around with different amounts
rewired_networks = []
for i in range(iterations):
    print(f"Iteration {i+1}/{iterations}")
    rewired_network = rewire_edges(total_nv_network_062025.copy())
    rewired_networks.append(rewired_network)
    
# Step 3: Log transform the rewirings' degree values and calculate the mean and SD of them sample population we have created
logged_degrees_rewired = []
for rewired_network in rewired_networks:
    degrees_rewired = rewired_network.degree_property_map('total').a
    logged_degrees_rewired.append(np.log(degrees_rewired))

mean_logged_degrees_rewired = np.mean(logged_degrees_rewired, axis=0)
std_logged_degrees_rewired = np.mean(logged_degrees_rewired, axis=0)

# Step 4: Calculate the z-scores for the rewirings using above information
z_scores = []
for degrees in logged_degrees_rewired:
    z_score = (degrees - mean_logged_degrees_rewired) / std_logged_degrees_rewired
    z_scores.append(z_score)

# Step 5: Calculate the actual structure's z-scores using above information
actual_z_score = (logged_degrees_actual - - mean_logged_degrees_rewired) / std_logged_degrees_rewired

# Step 6: Plot the results

In [None]:
# Looping and tracking clusters across entire time period (refer to the g-t package dictionary for clues)