Skip to content

Commit

Permalink
Merge d3da92a into ffbcb09
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelwood committed May 26, 2021
2 parents ffbcb09 + d3da92a commit 2525021
Show file tree
Hide file tree
Showing 34 changed files with 488 additions and 70 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ jobs:
- run: pip install -r requirements_dev.txt

- run: flake8
# To avoid datagetter errors, we must specifically exclude src/datagetter
- run: black --check --exclude "/(\.eggs|\.git|_cache|src\/datagetter)/" ./
# To local srcs, we must specifically exclude src
- run: black --check --exclude "/(\.eggs|\.git|_cache|src)/" ./
4 changes: 2 additions & 2 deletions datastore/additional_data/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class AdditionalDataGenerator(object):
""" Adds additional data to grant data """
"""Adds additional data to grant data"""

def __init__(self):
self.local_files_source = LocalFilesSource()
Expand All @@ -17,7 +17,7 @@ def __init__(self):
# Initialise Other Sources heres

def create(self, grant):
""" Takes a grant's data and returns a dict of additional data """
"""Takes a grant's data and returns a dict of additional data"""

additional_data = {}

Expand Down
4 changes: 2 additions & 2 deletions datastore/additional_data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ class GeoLookup(models.Model):


class TSGOrgType(models.Model):
""" ThreeSixtyGiving Org Type mappings """
"""ThreeSixtyGiving Org Type mappings"""

def validate_regex(value):
""" Check that the input regex is valid """
"""Check that the input regex is valid"""
try:
re.compile(value)
except re.error as e:
Expand Down
4 changes: 2 additions & 2 deletions datastore/additional_data/sources/find_that_charity.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def update_additional_data(self, grant, additional_data):
self._cache[org_id] = None

def process_csv(self, file_data, org_type):
""" Returns total added. file_data array from csv """
"""Returns total added. file_data array from csv"""
added = 0
bulk_list = []

Expand Down Expand Up @@ -89,7 +89,7 @@ def process_csv(self, file_data, org_type):
return added

def import_from_path(self, path, org_type=None):
""" Path can be http or file path, org_type if omitted we guess from the filename """
"""Path can be http or file path, org_type if omitted we guess from the filename"""
added = 0

# Have a guess at the org type from the path
Expand Down
6 changes: 3 additions & 3 deletions datastore/additional_data/sources/local_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ def update_additional_data(self, grant, additional_data):
self.update_additional_with_region(grant, additional_data)

def _setup_charity_mappings(self):
""" Setup info for charity names """
"""Setup info for charity names"""

with open(os.path.join(self.data_files_dir, "charity_names.json")) as fd:

charity_names = json.load(fd)
self.id_name_org_mappings["recipientOrganization"].update(charity_names)

def _setup_org_name_mappings(self):
""" Setup overrides for org name """
"""Setup overrides for org name"""

with open(
os.path.join(self.data_files_dir, "primary_funding_org_name.json")
Expand All @@ -68,7 +68,7 @@ def _setup_org_name_mappings(self):
self.id_name_org_mappings["fundingOrganization"].update(funding_org_name)

def _setup_area_mappings(self):
""" Setup the area/district mappings """
"""Setup the area/district mappings"""

with open(
os.path.join(self.data_files_dir, "codelist.csv")
Expand Down
2 changes: 1 addition & 1 deletion datastore/additional_data/sources/tsg_org_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class TSGOrgTypesSource(object):
""" This adds a custom ThreeSixtyGiving organisation type of the funding organisation to the additional data"""
"""This adds a custom ThreeSixtyGiving organisation type of the funding organisation to the additional data"""

ADDITIONAL_DATA_KEY = "TSGFundingOrgType"

Expand Down
Empty file.
32 changes: 32 additions & 0 deletions datastore/api/dashboard/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import django_filters.rest_framework
from rest_framework import filters, generics
import db.models as db
from api.dashboard import serializers
from api.dashboard.permissions import ReadOnly


class Publishers(generics.ListAPIView):
serializer_class = serializers.PublishersSerializer
permission_classes = [ReadOnly]

filter_backends = (
filters.SearchFilter,
django_filters.rest_framework.DjangoFilterBackend,
filters.OrderingFilter,
)

search_fields = ("^data__name",)
filterset_fields = ["name", "prefix"]

ordering_fields = ["data__name"]

def get_queryset(self):
return db.Publisher.objects.filter(getter_run=db.GetterRun.objects.last())


class Sources(generics.ListAPIView):
serializer_class = serializers.SourcesSerializer
# pagination_class = CurrentLatestGrantsPaginator

def get_queryset(self):
return db.SourceFile.objects.filter(getter_run=db.GetterRun.objects.last())
14 changes: 14 additions & 0 deletions datastore/api/dashboard/permissions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from rest_framework import permissions


class ReadOnly(permissions.BasePermission):
"""
Always read-only even if authenticated
"""

def has_object_permission(self, request, view, obj):
# we'll always allow GET, HEAD or OPTIONS requests.
if request.method in permissions.SAFE_METHODS:
return True

return False
36 changes: 36 additions & 0 deletions datastore/api/dashboard/serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from rest_framework import serializers

import db.models as db


class SourcesSerializer(serializers.ModelSerializer):
id = serializers.JSONField(source="data.identifier")
datagetter_data = serializers.JSONField()
modified = serializers.JSONField(source="data.modified")
grants = serializers.IntegerField()
distribution = serializers.JSONField(source="get_distribution")
quality = serializers.JSONField()

class Meta:
model = db.SourceFile
fields = (
"id",
"datagetter_data",
"grants",
"distribution",
"modified",
"quality",
)


class PublishersSerializer(serializers.ModelSerializer):
name = serializers.JSONField(source="data.name")
logo = serializers.JSONField(source="data.logo")
prefix = serializers.JSONField(source="data.prefix")
website = serializers.JSONField(source="data.website")

files = SourcesSerializer(source="get_sourcefiles", many=True)

class Meta:
model = db.Publisher
fields = ("name", "logo", "prefix", "website", "files")
2 changes: 1 addition & 1 deletion datastore/api/grantnav/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class GrantNavPollForNewData(View):
""" API endpoint for GrantNav to poll to know that new data is available """
"""API endpoint for GrantNav to poll to know that new data is available"""

def get(self, *args, **kwargs):
statuses = db.Status.objects.all()
Expand Down
1 change: 1 addition & 0 deletions datastore/api/templates/api.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ <h2>API</h2>
<li><a href="{% url "api:current-latest-grants" %}">{% url "api:current-latest-grants" %}</a></li>
<li><a href="{% url "api:status" %}">{% url "api:status" %}</a></li>
<li><a href="{% url "api:grantnav-updates" %}">{% url "api:grantnav-updates" %}</a></li>
<li><a href="{% url "api:publishers" %}">{% url "api:publishers" %}</a></li>
</ul>
</div>
{% endblock %}
6 changes: 6 additions & 0 deletions datastore/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,17 @@
import api.control.api
import api.experimental.api
import api.grantnav.api
import api.dashboard.api

app_name = "api"

urlpatterns = [
path("", TemplateView.as_view(template_name="api.html"), name="index"),
path(
"dashboard/publishers",
api.dashboard.api.Publishers.as_view(),
name="publishers",
),
path(
"grantnav/updates",
api.grantnav.api.GrantNavPollForNewData.as_view(),
Expand Down
17 changes: 17 additions & 0 deletions datastore/data_quality/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from lib360dataquality.cove.threesixtygiving import (
USEFULNESS_TEST_CLASS,
common_checks_360,
)
from lib360dataquality.cove.schema import Schema360

schema = Schema360()


def create(grants):
"""grants: grants json"""

result = {}

common_checks_360(result, "/", grants, schema, test_classes=[USEFULNESS_TEST_CLASS])

return result
5 changes: 5 additions & 0 deletions datastore/data_quality/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from django.apps import AppConfig


class AdditionalDataConfig(AppConfig):
name = "data_quality"
34 changes: 34 additions & 0 deletions datastore/data_quality/management/commands/rewrite_quality_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from django.core.management.base import BaseCommand

from data_quality import quality_data
import db.models as db


class Command(BaseCommand):
help = (
"Reloads the additional data on grant data specified by datagetter id or latest"
)

def add_arguments(self, parser):
parser.add_argument(
type=str,
action="store",
dest="getter_run",
help="The datagetter run id or latest",
)

def handle(self, *args, **options):

if "latest" in options["getter_run"]:
source_files = db.Latest.objects.get(
series=db.Latest.CURRENT
).sourcefile_set.all()
else:
source_files = db.SourceFile.object.filter(getter_run=options["getter_run"])

for source_file in source_files:
grants_list = {
"grants": list(source_file.grant_set.values_list("data", flat=True))
}
source_file.quality = quality_data.create(grants_list)
source_file.save()
44 changes: 44 additions & 0 deletions datastore/data_quality/quality_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from lib360dataquality.cove.threesixtygiving import (
USEFULNESS_TEST_CLASS,
TEST_CLASSES,
common_checks_360,
)
from lib360dataquality.cove.schema import Schema360
from tempfile import TemporaryDirectory

schema = Schema360()


def create(grants):
"""grants: grants json"""

cove_results = {"file_type": "json"}

with TemporaryDirectory() as tempdir:
common_checks_360(
cove_results, tempdir, grants, schema, test_classes=[USEFULNESS_TEST_CLASS]
)

# We don't quite want the result as-is
# our format is:
# {
# "TestClassName": { "heading": "text", "count": n },
# "TestTwoClassName": { "count": 0 },
# }
# We only get results when a quality test finds an issue, so to provide a count 0
# value we template out all the expected results from all the available tests.

quality_results = {}

# Create the template
for available_test in TEST_CLASSES[USEFULNESS_TEST_CLASS]:
quality_results[available_test.__name__] = {"count": 0}

# Update with the results
for test in cove_results["usefulness_checks"]:
quality_results[test[0]["type"]] = {
"heading": test[0]["heading"],
"count": test[0]["count"],
}

return quality_results
51 changes: 51 additions & 0 deletions datastore/db/management/commands/archive_getter_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from django.core.management.base import BaseCommand, CommandError

from db.models import GetterRun


class Command(BaseCommand):
help = "Archive the getter run by the specified getter run ids. Archiving deletes grant data but not the metadata (SourceFiles, Publishers)"

def add_arguments(self, parser):
parser.add_argument(
type=int,
nargs="*",
action="store",
dest="getter_run_ids",
help="The datagetter run ids",
)

parser.add_argument(
"--no-prompt",
action="store_true",
help="Don't prompt for archiving",
)

parser.add_argument(
"--oldest",
action="store_true",
help="Archives the oldest getter run",
)

def handle(self, *args, **options):
if options.get("oldest"):
to_delete = GetterRun.objects.order_by("datetime").first()
options["getter_run_ids"] = [to_delete.pk]

if len(options["getter_run_ids"]) == 0:
raise CommandError("No datagetter data specified")

for run in options["getter_run_ids"]:
try:
confirm = "n"
getter_run = GetterRun.objects.get(pk=run)

if not options["no_prompt"]:
confirm = input("Confirm delete grant data '%s' y/n: " % run)

if "y" in confirm or "Y" in confirm or options["no_prompt"]:
getter_run.archive_run()
print("Archived %s" % run)

except GetterRun.DoesNotExist:
raise CommandError("Run id '%s' doesn't exist " % run)
2 changes: 1 addition & 1 deletion datastore/db/management/commands/create_data_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def handle(self, *args, **options):
data_all_file = "%s/data_all.json" % options["dir"]

def flatten_grant(in_grant):
""" Add the additional_data inside grant object """
"""Add the additional_data inside grant object"""
out_grant = {}
out_grant.update(in_grant["data"])
try:
Expand Down
Loading

0 comments on commit 2525021

Please sign in to comment.