WIP

ThreeSixtyGiving · Oct 27, 2021 · f3f22d4 · f3f22d4
1 parent cdf7fec
commit f3f22d4
Showing 1 changed file with 33 additions and 18 deletions.
diff --git a/datastore/data_quality/quality_data.py b/datastore/data_quality/quality_data.py
@@ -8,7 +8,7 @@
 from lib360dataquality.cove.schema import Schema360
 from lib360dataquality import check_field_present
 
-from django.db.models import Q, Sum
+from django.db.models import Sum
 from django.db.models.expressions import RawSQL
 from django.db import connection
 
@@ -57,8 +57,13 @@ def create(grants):
     for available_test in TEST_CLASSES[USEFULNESS_TEST_CLASS]:
         quality_results[available_test.__name__] = {"count": 0, "fail": False}
 
-    # This is a test that we actually copy from another test see below
-    quality_results["RecipientOrgPrefixExternal"] = {"count": 0, "fail": False}
+    # Initialise two new tests
+    # These will be derived from RecipientOrg360GPrefix
+    quality_results["RecipientOrgPrefixExternal"] = {
+        "count": cove_results["grants_aggregates"]["count"],
+        "fail": False,
+    }
+    quality_results["RecipientOrgPrefix50pcExternal"] = {"count": 0, "fail": False}
 
     # Update with a heading and count template.
     for test in cove_results["usefulness_checks"]:
@@ -68,19 +73,22 @@ def create(grants):
             # If all the grants fail a test then we mark as fail true
             "fail": test[0]["count"] == cove_results["grants_aggregates"]["count"],
         }
-
-        # Our fail/pass conditions for this test are based at least 50% of recipients
-        # having an external (non 360G) org id.
         if "RecipientOrg360GPrefix" in test[0]["type"]:
-            quality_results["RecipientOrgPrefix50pcExternal"] = test[0]
+            # Create a version of this test for 50% ext org ids
+            # Our fail/pass conditions for this test are based at least 50% of recipients
+            # having an external (non 360G) org id.
             quality_results["RecipientOrgPrefix50pcExternal"]["fail"] = (
                 test[0]["count"] >= cove_results["grants_aggregates"]["count"] / 2
             )
 
             # Create an inverted version of this test for simplicity
+            # total grants - the number of grants with 360 prefix to give the number *with*
+            # an ext org id
+            count = cove_results["grants_aggregates"]["count"] - test[0]["count"]
+
             quality_results["RecipientOrgPrefixExternal"] = {
-                "count": cove_results["grants_aggregates"]["count"] - test[0]["count"],
-                "fail": test[0]["count"] == 0,
+                "count": count,
+                "fail": count == 0,
                 "heading": "Recipient Orgs with external org identifier",
             }
 
@@ -106,17 +114,22 @@ def create(grants):
     aggregates["recipient_org_types"] = {}
 
     def extract_org_id_type(org_id):
+        # Ignore internal org ids
+        if "360G-" in org_id:
+            return None
+
         try:
             return org_id.split("-")[1]
         except IndexError:
-            return "Unknown"
+            return None
 
     for grant in grants["grants"]:
         org_id_type = extract_org_id_type(grant["recipientOrganization"][0]["id"])
-        try:
-            aggregates["recipient_org_types"][org_id_type] += 1
-        except KeyError:
-            aggregates["recipient_org_types"][org_id_type] = 1
+        if org_id_type:
+            try:
+                aggregates["recipient_org_types"][org_id_type] += 1
+            except KeyError:
+                aggregates["recipient_org_types"][org_id_type] = 1
 
     return quality_results, aggregates
 
@@ -298,10 +311,10 @@ def get_pc_publishers_with_recipient_ext_org(self):
             ),
         )
 
-        for i in ranges:
-            ret["{}% - {}%".format(*i)] = (
-                query.distinct("data__publisher__prefix")
-                .filter(Q(pc__gte=i[0]) & Q(pc__lt=i[1]))
+        for range in ranges:
+            ret["{}% - {}%".format(*range)] = (
+                query.filter(pc__range=(range[0], range[1]))
+                .distinct("data__publisher__prefix")
                 .count()
                 / total_publishers
                 * 100
@@ -369,6 +382,8 @@ def get_grant_org_id_types_used(self):
            aggregate->'recipient_org_types' is not null AND
            db_sourcefile_latest.latest_id={latest_id}
          GROUP BY keyval.key
+         ORDER BY sum DESC
+         LIMIT 10
         """
 
         cursor = connection.cursor()