GoogleCloudPlatform · maunope · Oct 25, 2022 · Oct 10, 2022 · Oct 12, 2022 · Oct 12, 2022
diff --git a/.gitignore b/.gitignore
@@ -36,3 +36,4 @@ examples/cloud-operations/adfs/ansible/vars/vars.yaml
 examples/cloud-operations/adfs/ansible/gssh.sh
 examples/cloud-operations/multi-cluster-mesh-gke-fleet-api/ansible/vars.yaml
 examples/cloud-operations/multi-cluster-mesh-gke-fleet-api/ansible/gssh.sh
+blueprints/cloud-operations/network-dashboard/cloud-function.zip
diff --git a/blueprints/cloud-operations/network-dashboard/README.md b/blueprints/cloud-operations/network-dashboard/README.md
@@ -46,22 +46,32 @@ The Cloud Function currently tracks usage, limit and utilization of:
 - internal forwarding rules for internal L7 load balancers per VPC
 - internal forwarding rules for internal L4 load balancers per VPC peering group
 - internal forwarding rules for internal L7 load balancers per VPC peering group
-- Dynamic routes per VPC
-- Dynamic routes per VPC peering group
+- Dynamic routes per VPC 
+- Dynamic routes per VPC peering group 
+- Static routes per project (VPC drill down is available for usage)
+- Static routes per VPC peering group 
 - IP utilization per subnet (% of IP addresses used in a subnet)
 - VPC firewall rules per project (VPC drill down is available for usage)
 - Tuples per Firewall Policy
 
 It writes this values to custom metrics in Cloud Monitoring and creates a dashboard to visualize the current utilization of these metrics in Cloud Monitoring.
 
-Note that metrics are created in the cloud-function/metrics.yaml file.
+Note that metrics are created in the cloud-function/metrics.yaml file. You can also edit default limits for a specific network in that file. See the example for `vpc_peering_per_network`.
+
+## Assumptions and limitations
+- The CF assumes that all VPCs in peering groups are within the same organization, except for PSA peerings
+- PSA peerings record only subnets data
+- The CF assumes global routing is ON, this impacts dynamic routes usage calculation
+- The CF assumes custom routes importing/exporting is ON, this impacts static and dynamic routes usage calculation
+- The CF assumes all networks in peering groups have the same global routing and custom routes sharing configuration
 
-You can also edit default limits for a specific network in that file. See the example for `vpc_peering_per_network`.
 
 ## Next steps and ideas
 In a future release, we could support:
-- Static routes per VPC / per VPC peering group
 - Google managed VPCs that are peered with PSA (such as Cloud SQL or Memorystore)
+- Dynamic routes calculation for VPCs/PPGs with "global routing" set to OFF
+- Static routes calculation for projects/PPGs with "custom routes importing/exporting" set to OFF
+- Calculations for cross Organization peering groups
 
 If you are interested in this and/or would like to contribute, please contact legranda@google.com.
 <!-- BEGIN TFDOC -->

diff --git a/blueprints/cloud-operations/network-dashboard/cloud-function/main.py b/blueprints/cloud-operations/network-dashboard/cloud-function/main.py
@@ -163,6 +163,9 @@ def main(event, context=None):
   l4_forwarding_rules_dict = ilb_fwrules.get_forwarding_rules_dict(config, "L4")
   l7_forwarding_rules_dict = ilb_fwrules.get_forwarding_rules_dict(config, "L7")
   subnet_range_dict = networks.get_subnet_ranges_dict(config)
+  static_routes_dict = routes.get_static_routes_dict(config)
+  dynamic_routes_dict = routes.get_dynamic_routes(
+      config, metrics_dict, limits_dict['dynamic_routes_per_network_limit'])
 
   try:
 
@@ -181,10 +184,12 @@ def main(event, context=None):
     ilb_fwrules.get_forwarding_rules_data(
         config, metrics_dict, l7_forwarding_rules_dict,
         limits_dict['internal_forwarding_rules_l7_limit'], "L7")
+
+    routes.get_static_routes_data(config, metrics_dict, static_routes_dict,
+                                  project_quotas_dict)
+
     peerings.get_vpc_peering_data(config, metrics_dict,
                                   limits_dict['number_of_vpc_peerings_limit'])
-    dynamic_routes_dict = routes.get_dynamic_routes(
-        config, metrics_dict, limits_dict['dynamic_routes_per_network_limit'])
 
     # Per VPC peering group metrics
     metrics.get_pgg_data(
@@ -207,7 +212,13 @@ def main(event, context=None):
         ["subnet_ranges_per_peering_group"], subnet_range_dict,
         config["limit_names"]["SUBNET_RANGES"],
         limits_dict['number_of_subnet_IP_ranges_ppg_limit'])
-    routes.get_dynamic_routes_ppg(
+    #static
+    routes.get_routes_ppg(
+        config, metrics_dict["metrics_per_peering_group"]
+        ["static_routes_per_peering_group"], static_routes_dict,
+        limits_dict['static_routes_per_peering_group_limit'])
+    #dynamic
+    routes.get_routes_ppg(
         config, metrics_dict["metrics_per_peering_group"]
         ["dynamic_routes_per_peering_group"], dynamic_routes_dict,
         limits_dict['dynamic_routes_per_peering_group_limit'])

diff --git a/blueprints/cloud-operations/network-dashboard/cloud-function/metrics.yaml b/blueprints/cloud-operations/network-dashboard/cloud-function/metrics.yaml
@@ -99,6 +99,19 @@ metrics_per_network:
     utilization:
       name: dynamic_routes_per_network_utilization
       description: Number of Dynamic routes per network - utilization.
+  #static routes limit is per project, but usage is per network
+  static_routes_per_project:
+    usage:
+      name: static_routes_per_project_vpc_usage
+      description: Number of Static routes per project and network - usage.
+    limit:
+      name: static_routes_per_project_limit
+      description: Number of Static routes per project - limit.
+      values:
+        default_value: 250
+    utilization:
+      name: static_routes_per_project_utilization
+      description: Number of Static routes per project - utilization.
 metrics_per_peering_group:
   l4_forwarding_rules_per_peering_group:
     usage:
@@ -160,6 +173,18 @@ metrics_per_peering_group:
     utilization:
       name: dynamic_routes_per_peering_group_utilization
       description: Number of Dynamic routes per peering group - utilization.
+  static_routes_per_peering_group:
+    usage:
+      name: static_routes_per_peering_group_usage
+      description: Number of Static routes per peering group - usage.
+    limit:
+      name: static_routes_per_peering_group_limit
+      description: Number of Static routes per peering group - limit.
+      values:
+        default_value: 300
+    utilization:
+      name: static_routes_per_peering_group_utilization
+      description: Number of Static routes per peering group - utilization.
 metrics_per_project:
   firewalls:
     usage:

diff --git a/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/firewall_policies.py b/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/firewall_policies.py
@@ -26,8 +26,8 @@
 
 def get_firewall_policies_dict(config: dict):
   '''
-    Calls the Asset Inventory API to get all Firewall Policies under the GCP organization
-
+    Calls the Asset Inventory API to get all Firewall Policies under the GCP organization, including children
+    Ignores monitored projects list: returns all policies regardless of their parent resource
       Parameters:
         config (dict): The dict containing config like clients and limits
       Returns:
@@ -55,8 +55,8 @@ def get_firewall_policies_dict(config: dict):
 
 def get_firewal_policies_data(config, metrics_dict, firewall_policies_dict):
   '''
-    Gets the data for VPC Firewall lorem ipsum
-
+    Gets the data for VPC Firewall Policies in an organization, including children. All folders are considered, 
+    only projects in the monitored projects list are considered. 
       Parameters:
         config (dict): The dict containing config like clients and limits
         metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions.
@@ -91,6 +91,9 @@ def get_firewal_policies_data(config, metrics_dict, firewall_policies_dict):
     parent_type = re.search("(^\w+)", firewall_policy["parent"]).group(
         1) if "parent" in firewall_policy else "projects"
 
+    if parent_type == "projects" and parent not in config["monitored_projects"]:
+      continue
+
     metric_labels = {'parent': parent, 'parent_type': parent_type}
 
     metric_labels["name"] = firewall_policy[

diff --git a/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/limits.py b/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/limits.py
@@ -42,7 +42,7 @@ def get_quotas_dict(quotas_list):
 
 def get_quota_project_limit(config, regions=["global"]):
   '''
-    Retrieves limit for a specific project quota 
+    Retrieves quotas for all monitored project in selected regions, default 'global'
       Parameters:
         project_link (string): Project link.
       Returns:
@@ -158,7 +158,7 @@ def get_quota_current_limit(config, project_link, metric_name):
 
 def count_effective_limit(config, project_id, network_dict, usage_metric_name,
                           limit_metric_name, utilization_metric_name,
-                          limit_dict):
+                          limit_dict, timestamp=None):
   '''
     Calculates the effective limits (using algorithm in the link below) for peering groups and writes data (usage, limit, utilization) to the custom metrics.
     Source: https://cloud.google.com/vpc/docs/quota#vpc-peering-effective-limit
@@ -171,11 +171,13 @@ def count_effective_limit(config, project_id, network_dict, usage_metric_name,
         limit_metric_name (string): Name of the custom metric to be populated for limit per VPC peering group.
         utilization_metric_name (string): Name of the custom metric to be populated for utilization per VPC peering group.
         limit_dict (dictionary of string:int): Dictionary containing the limit per peering group (either VPC specific or default limit).
+        timestamp (time): timestamp to be recorded for all points
       Returns:
         None
   '''
 
-  timestamp = time.time()
+  if timestamp == None:
+    timestamp = time.time()
 
   if network_dict['peerings'] == []:
     return

diff --git a/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/metrics.py b/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/metrics.py
@@ -91,7 +91,8 @@ def create_metric(metric_name, description, monitoring_project, config):
 def append_data_to_series_buffer(config, metric_name, metric_value,
                                  metric_labels, timestamp=None):
   '''
-    Writes data to Cloud Monitoring custom metrics.
+    Appends data to Cloud Monitoring custom metrics, using a buffer. buffer is flushed every BUFFER_LEN elements,
+    any unflushed series is discarded upon function closure
       Parameters:
         config (dict): The dict containing config like clients and limits
         metric_name (string): Name of the metric
@@ -139,7 +140,7 @@ def append_data_to_series_buffer(config, metric_name, metric_value,
 
 def flush_series_buffer(config):
   '''
-    writes buffered metrics to Google Cloud Monitoring, empties buffer upon failure
+    writes buffered metrics to Google Cloud Monitoring, empties buffer upon both failure/success
     config (dict): The dict containing config like clients and limits
   '''
   try:
@@ -188,6 +189,7 @@ def get_pgg_data(config, metric_dict, usage_dict, limit_metric, limit_dict):
 
     current_quota_limit_view = customize_quota_view(current_quota_limit)
 
+    timestamp = time.time()
     # For each network in this GCP project
     for network_dict in network_dict_list:
       if network_dict['network_id'] == 0:
@@ -238,7 +240,7 @@ def get_pgg_data(config, metric_dict, usage_dict, limit_metric, limit_dict):
                                    metric_dict["usage"]["name"],
                                    metric_dict["limit"]["name"],
                                    metric_dict["utilization"]["name"],
-                                   limit_dict)
+                                   limit_dict, timestamp)
       print(
           f"Buffered {metric_dict['usage']['name']} for peering group {network_dict['network_name']} in {project_id}"
       )

diff --git a/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/routes.py b/blueprints/cloud-operations/network-dashboard/cloud-function/metrics/routes.py
@@ -17,6 +17,7 @@
 import time
 
 from collections import defaultdict
+from google.protobuf import field_mask_pb2
 from . import metrics, networks, limits, peerings, routers
 
 
@@ -78,8 +79,8 @@ def get_routes_for_network(config, network_link, project_id, routers_dict):
 
 def get_dynamic_routes(config, metrics_dict, limits_dict):
   '''
-    Writes all dynamic routes per VPC to custom metrics.
-
+    This function gets the usage, limit and utilization for the dynamic routes per VPC
+    note: assumes global routing is ON for all VPCs
       Parameters:
         config (dict): The dict containing config like clients and limits
         metrics_dict (dictionary of dictionary of string: string): metrics names and descriptions.
@@ -128,10 +129,10 @@ def get_dynamic_routes(config, metrics_dict, limits_dict):
     return dynamic_routes_dict
 
 
-def get_dynamic_routes_ppg(config, metric_dict, usage_dict, limit_dict):
+def get_routes_ppg(config, metric_dict, usage_dict, limit_dict):
   '''
-    This function gets the usage, limit and utilization for the dynamic routes per VPC peering group.
-
+    This function gets the usage, limit and utilization for the static or dynamic routes per VPC peering group.
+    note: assumes global routing is ON for all VPCs for dynamic routes, assumes share custom routes is on for all peered networks
       Parameters:
         config (dict): The dict containing config like clients and limits
         metric_dict (dictionary of string: string): Dictionary with the metric names and description, that will be used to populate the metrics
@@ -140,11 +141,12 @@ def get_dynamic_routes_ppg(config, metric_dict, usage_dict, limit_dict):
       Returns:
         None
   '''
-  for project in config["monitored_projects"]:
-    network_dict_list = peerings.gather_peering_data(config, project)
+  timestamp = time.time()
+  for project_id in config["monitored_projects"]:
+    network_dict_list = peerings.gather_peering_data(config, project_id)
 
     for network_dict in network_dict_list:
-      network_link = f"https://www.googleapis.com/compute/v1/projects/{project}/global/networks/{network_dict['network_name']}"
+      network_link = f"https://www.googleapis.com/compute/v1/projects/{project_id}/global/networks/{network_dict['network_name']}"
 
       limit = limits.get_ppg(network_link, limit_dict)
 
@@ -169,11 +171,119 @@ def get_dynamic_routes_ppg(config, metric_dict, usage_dict, limit_dict):
         peered_network_dict["usage"] = peered_usage
         peered_network_dict["limit"] = peered_limit
 
-      limits.count_effective_limit(config, project, network_dict,
+      limits.count_effective_limit(config, project_id, network_dict,
                                    metric_dict["usage"]["name"],
                                    metric_dict["limit"]["name"],
                                    metric_dict["utilization"]["name"],
-                                   limit_dict)
+                                   limit_dict, timestamp)
+      print(
+          f"Buffered {metric_dict['usage']['name']} for peering group {network_dict['network_name']} in {project_id}"
+      )
+
+
+def get_static_routes_dict(config):
+  '''
+    Calls the Asset Inventory API to get all static custom routes under the GCP organization.
+    Parameters:
+      config (dict): The dict containing config like clients and limits
+    Returns:
+      routes_per_vpc_dict (dictionary of string: int): Keys are the network links and values are the number of custom static routes per network.
+  '''
+  routes_per_vpc_dict = defaultdict()
+  usage_dict = defaultdict()
+
+  read_mask = field_mask_pb2.FieldMask()
+  read_mask.FromJsonString('name,versionedResources')
+
+  response = config["clients"]["asset_client"].search_all_resources(
+      request={
+          "scope": f"organizations/{config['organization']}",
+          "asset_types": ["compute.googleapis.com/Route"],
+          "read_mask": read_mask
+      })
+
+  for resource in response:
+    for versioned in resource.versioned_resources:
+      static_route = dict()
+      for field_name, field_value in versioned.resource.items():
+        static_route[field_name] = field_value
+      static_route["project_id"] = static_route["network"].split('/')[6]
+      static_route["network_name"] = static_route["network"].split('/')[-1]
+      network_link = f"https://www.googleapis.com/compute/v1/projects/{static_route['project_id']}/global/networks/{static_route['network_name']}"
+      #exclude default vpc and peering routes, dynamic routes are not in Cloud Asset Inventory
+      if "nextHopPeering" not in static_route and "nextHopNetwork" not in static_route:
+        if network_link not in routes_per_vpc_dict:
+          routes_per_vpc_dict[network_link] = dict()
+          routes_per_vpc_dict[network_link]["project_id"] = static_route[
+              "project_id"]
+          routes_per_vpc_dict[network_link]["network_name"] = static_route[
+              "network_name"]
+        if static_route["destRange"] not in routes_per_vpc_dict[network_link]:
+          routes_per_vpc_dict[network_link][static_route["destRange"]] = {}
+        if "usage" not in routes_per_vpc_dict[network_link]:
+          routes_per_vpc_dict[network_link]["usage"] = 0
+        routes_per_vpc_dict[network_link][
+            "usage"] = routes_per_vpc_dict[network_link]["usage"] + 1
+
+  #output a dict with network links and usage only
+  return {
+      network_link_out: routes_per_vpc_dict[network_link_out]["usage"]
+      for network_link_out in routes_per_vpc_dict
+  }
+
+
+def get_static_routes_data(config, metrics_dict, static_routes_dict,
+                           project_quotas_dict):
+  '''
+    Determines and writes the number of static routes for each VPC in monitored projects, the per project limit and the per project utilization
+    note: assumes custom routes sharing is ON for all VPCs
+      Parameters:
+        config (dict): The dict containing config like clients and limits
+        metric_dict (dictionary of string: string): Dictionary with the metric names and description, that will be used to populate the metrics
+        static_routes_dict (dictionary of dictionary: int): Keys are the network links and values are the number of custom static routes per network.
+        project_quotas_dict (dictionary of string:int): Dictionary with the network link as key and the limit as value.
+      Returns:
+        None
+  '''
+  timestamp = time.time()
+  project_usage = {project: 0 for project in config["monitored_projects"]}
+
+  #usage is drilled down by network
+  for network_link in static_routes_dict:
+
+    project_id = network_link.split('/')[6]
+    if (project_id not in config["monitored_projects"]):
+      continue
+    network_name = network_link.split('/')[-1]
+
+    project_usage[project_id] = project_usage[project_id] + static_routes_dict[
+        network_link]
+
+    metric_labels = {"project": project_id, "network_name": network_name}
+    metrics.append_data_to_series_buffer(
+        config, metrics_dict["metrics_per_network"]["static_routes_per_project"]
+        ["usage"]["name"], static_routes_dict[network_link], metric_labels,
+        timestamp=timestamp)
+
+  #limit and utilization are calculated by project
+  for project_id in project_usage:
+    current_quota_limit = project_quotas_dict[project_id]['global']["routes"][
+        "limit"]
+    if current_quota_limit is None:
       print(
-          f"Wrote {metric_dict['usage']['name']} for peering group {network_dict['network_name']} in {project}"
+          f"Could not determine static routes  metric for projects/{project_id} due to missing quotas"
       )
+      continue
+    # limit and utilization are calculted by project
+    metric_labels = {"project": project_id}
+    metrics.append_data_to_series_buffer(
+        config, metrics_dict["metrics_per_network"]["static_routes_per_project"]
+        ["limit"]["name"], current_quota_limit, metric_labels,
+        timestamp=timestamp)
+    metrics.append_data_to_series_buffer(
+        config, metrics_dict["metrics_per_network"]["static_routes_per_project"]
+        ["utilization"]["name"],
+        project_usage[project_id] / current_quota_limit, metric_labels,
+        timestamp=timestamp)
+
+  return