diff --git a/README.md b/README.md index 379ba127..4764f8da 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ $ manage.py load_datagetter_data ../path/to/data/dir/from/datagetter/ A number of the sources for additional_data have their own local caches these can be loaded via: ``` -$ manage.py load_code_names +$ manage.py load_geocode_names $ manage.py load_geolookups $ manage.py load_nspl $ manage.py load_org_data @@ -83,13 +83,13 @@ Whilst leaving the up command running, you should use `docker-compose run` with eg; instead of running: ``` -$ manage.py load_code_names +$ manage.py load_geocode_names ``` Run: ``` -$ docker-compose -f docker-compose.dev.yml run datastore-web python datastore/manage.py load_code_names +$ docker-compose -f docker-compose.dev.yml run datastore-web python datastore/manage.py load_geocode_names ``` ## Getting database CLI diff --git a/datastore/additional_data/generator.py b/datastore/additional_data/generator.py index dcdc2ac0..3e86315c 100644 --- a/datastore/additional_data/generator.py +++ b/datastore/additional_data/generator.py @@ -6,6 +6,7 @@ from additional_data.sources.additional_data_recipient_location import ( AdditionalDataRecipientLocation, ) +from additional_data.sources.codelist_code import CodeListSource class AdditionalDataGenerator(object): @@ -18,6 +19,7 @@ def __init__(self): self.geo_lookup = GeoLookupSource() self.tsg_org_types = TSGOrgTypesSource() self.additional_data_recipient_location = AdditionalDataRecipientLocation() + self.code_lists = CodeListSource() # Initialise Other Sources here def create(self, grant): @@ -35,5 +37,6 @@ def create(self, grant): self.additional_data_recipient_location.update_additional_data( grant, additional_data ) + self.code_lists.update_additional_data(grant, additional_data) return additional_data diff --git a/datastore/additional_data/management/commands/load_codelist_codes.py b/datastore/additional_data/management/commands/load_codelist_codes.py new file mode 100644 index 00000000..0143fe94 --- /dev/null +++ b/datastore/additional_data/management/commands/load_codelist_codes.py @@ -0,0 +1,11 @@ +from django.core.management.base import BaseCommand + +from additional_data.sources.codelist_code import CodeListSource + + +class Command(BaseCommand): + help = "Imports 360Giving standard codelist data" + + def handle(self, *args, **options): + source = CodeListSource() + source.import_codelists() diff --git a/datastore/additional_data/management/commands/load_code_names.py b/datastore/additional_data/management/commands/load_geocode_names.py similarity index 65% rename from datastore/additional_data/management/commands/load_code_names.py rename to datastore/additional_data/management/commands/load_geocode_names.py index 34ebbf84..74837985 100644 --- a/datastore/additional_data/management/commands/load_code_names.py +++ b/datastore/additional_data/management/commands/load_geocode_names.py @@ -1,11 +1,11 @@ from django.core.management.base import BaseCommand -from additional_data.sources.code_names import CodeNamesSource +from additional_data.sources.code_names import GeoCodeNamesSource class Command(BaseCommand): help = "Imports location code names" def handle(self, *args, **options): - source = CodeNamesSource() + source = GeoCodeNamesSource() source.import_code_names() diff --git a/datastore/additional_data/migrations/0008_auto_20230123_1727.py b/datastore/additional_data/migrations/0008_auto_20230123_1727.py new file mode 100644 index 00000000..9c964e5d --- /dev/null +++ b/datastore/additional_data/migrations/0008_auto_20230123_1727.py @@ -0,0 +1,47 @@ +# Generated by Django 3.2.16 on 2023-01-23 17:27 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("additional_data", "0007_auto_20200918_0954"), + ] + + operations = [ + migrations.CreateModel( + name="GeoCodeName", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("code", models.CharField(db_index=True, max_length=9)), + ("data", models.JSONField()), + ], + ), + migrations.DeleteModel( + name="CodeName", + ), + migrations.AlterField( + model_name="geolookup", + name="data", + field=models.JSONField(), + ), + migrations.AlterField( + model_name="nspl", + name="data", + field=models.JSONField(), + ), + migrations.AlterField( + model_name="orginfocache", + name="data", + field=models.JSONField(), + ), + ] diff --git a/datastore/additional_data/migrations/0009_codelistcode.py b/datastore/additional_data/migrations/0009_codelistcode.py new file mode 100644 index 00000000..f42de5dd --- /dev/null +++ b/datastore/additional_data/migrations/0009_codelistcode.py @@ -0,0 +1,46 @@ +# Generated by Django 3.2.16 on 2023-01-24 13:08 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("additional_data", "0008_auto_20230123_1727"), + ] + + operations = [ + migrations.CreateModel( + name="CodelistCode", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "list_name", + models.CharField( + help_text="The name of the CodeList the code belongs to", + max_length=200, + ), + ), + ("code", models.CharField(help_text="The code", max_length=200)), + ( + "title", + models.CharField(help_text="The title of the code", max_length=200), + ), + ( + "description", + models.TextField(help_text="The long description of the code"), + ), + ], + options={ + "unique_together": {("list_name", "code")}, + }, + ), + ] diff --git a/datastore/additional_data/models.py b/datastore/additional_data/models.py index c3b2f5c1..c4decf89 100644 --- a/datastore/additional_data/models.py +++ b/datastore/additional_data/models.py @@ -100,7 +100,7 @@ class NSPL(models.Model): data = JSONField() -class CodeName(models.Model): +class GeoCodeName(models.Model): code = models.CharField(max_length=9, db_index=True) data = JSONField() @@ -156,3 +156,17 @@ class Meta: def __str__(self): return self.tsg_org_type + + +class CodelistCode(models.Model): + """360Giving standard code lists codes and titles""" + + list_name = models.CharField( + max_length=200, help_text="The name of the codelist the code belongs to" + ) + code = models.CharField(max_length=200, help_text="The code") + title = models.CharField(max_length=200, help_text="The title of the code") + description = models.TextField(help_text="The long description of the code") + + class Meta: + unique_together = ("list_name", "code") diff --git a/datastore/additional_data/sources/codelist_code.py b/datastore/additional_data/sources/codelist_code.py new file mode 100644 index 00000000..3aa7c0ae --- /dev/null +++ b/datastore/additional_data/sources/codelist_code.py @@ -0,0 +1,90 @@ +import csv +import requests + +from additional_data.models import CodelistCode + +code_lists_urls = [ + "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/master/codelists/grantToIndividualsPurpose.csv", + "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/master/codelists/grantToIndividualsReason.csv", + "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/master/codelists/regrantType.csv", + # These lists aren't yet ready for use in the datastore + # https://github.com/ThreeSixtyGiving/standard/issues/348 + # https://github.com/ThreeSixtyGiving/standard/issues/349 + # "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/master/codelists/countryCode.csv", + # "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/master/codelists/currency.csv", + # "https://raw.githubusercontent.com/ThreeSixtyGiving/standard/master/codelists/geoCodeType.csv", +] + + +class CodeListSource(object): + """Looks up codes from 360Giving codelists and gets the title value of the code + responsible for field: codeListLookup + """ + + def import_codelists(self): + CodelistCode.objects.all().delete() + + for code_list_url in code_lists_urls: + # list name = last item in split -4 to remove extension .csv + list_name = code_list_url.split("/")[-1:][0][:-4] + with requests.get(code_list_url, stream=True) as r: + r.raise_for_status() + file_data = csv.DictReader( + r.iter_lines(decode_unicode=True), delimiter="," + ) + for value in file_data: + CodelistCode.objects.create( + code=value["Code"], + title=value["Title"], + description=value["Description"], + list_name=list_name, + ) + + def update_additional_data(self, grant, additional_data): + # check All the fields in the grant data that use codelists and make additional data field versions of them + + primaryGrantReason = "" + secondaryGrantReason = "" + grantPurpose = "" + regrantType = "" + + try: + code = grant["toIndividualDetails"]["primaryGrantReason"] + primaryGrantReason = CodelistCode.objects.get( + code=code, list_name="grantToIndividualsReason" + ).title + except (KeyError, CodelistCode.DoesNotExist): + pass + + try: + code = grant["toIndividualDetails"]["secondaryGrantReason"] + secondaryGrantReason = CodelistCode.objects.get( + code=code, list_name="grantToIndividualsReason" + ).title + except (KeyError, CodelistCode.DoesNotExist): + pass + + try: + code = grant["toIndividualDetails"]["grantPurpose"] + grantPurpose = CodelistCode.objects.get( + code=code, list_name="grantToIndividualsPurpose" + ).title + except (KeyError, CodelistCode.DoesNotExist): + pass + + try: + code = grant["regrantType"] + regrantType = CodelistCode.objects.get( + code=code, list_name="regrantType" + ).title + except (KeyError, CodelistCode.DoesNotExist): + pass + + additional_data["codeListLookup"] = { + "toIndividualDetails": { + "primaryGrantReason": primaryGrantReason, + "secondaryGrantReason": secondaryGrantReason, + "grantPurpose": grantPurpose, + }, + "regrantType": regrantType, + } diff --git a/datastore/additional_data/sources/code_names.py b/datastore/additional_data/sources/geocode_names.py similarity index 94% rename from datastore/additional_data/sources/code_names.py rename to datastore/additional_data/sources/geocode_names.py index 977d28ab..386bc704 100644 --- a/datastore/additional_data/sources/code_names.py +++ b/datastore/additional_data/sources/geocode_names.py @@ -5,14 +5,14 @@ import requests -from additional_data.models import CodeName +from additional_data.models import GeoCodeName # based on 'import_chd' function in # https://github.com/drkane/find-that-postcode/blob/master/findthatpostcode/commands/codes.py -class CodeNamesSource(object): - """Uses CHD (Change history data) at https://geoportal.statistics.gov.uk/ to obtain information about code names.""" +class GeoCodeNamesSource(object): + """Uses CHD (Change history data) at https://geoportal.statistics.gov.uk/ to obtain information about geo code names.""" CHD_URL = "https://www.arcgis.com/sharing/rest/content/items/56b8f6d2d26646cb9d21fadca2f09452/data" @@ -128,9 +128,9 @@ def save_data(self, areas): bulk_save = [] for code, data in areas.items(): - bulk_save.append(CodeName(code=code, data=data)) + bulk_save.append(GeoCodeName(code=code, data=data)) - CodeName.objects.bulk_create(bulk_save) + GeoCodeName.objects.bulk_create(bulk_save) def import_code_names(self, url=CHD_URL): """ @@ -147,7 +147,7 @@ def import_code_names(self, url=CHD_URL): zip_file = self.get_zipfile(url) areas = self.get_areas(zip_file) - if CodeName.objects.exists(): - CodeName.objects.all().delete() + if GeoCodeName.objects.exists(): + GeoCodeName.objects.all().delete() self.save_data(areas) diff --git a/datastore/additional_data/sources/nspl.py b/datastore/additional_data/sources/nspl.py index 612b461b..1cb16bd2 100644 --- a/datastore/additional_data/sources/nspl.py +++ b/datastore/additional_data/sources/nspl.py @@ -6,7 +6,7 @@ import requests -from additional_data.models import NSPL, CodeName +from additional_data.models import NSPL, GeoCodeName # based on https://github.com/drkane/find-that-postcode/blob/master/findthatpostcode/commands/postcodes.py @@ -161,9 +161,9 @@ def update_location_data_code_names(self, location_data): continue except KeyError: try: - code_name_obj = CodeName.objects.get(code=field_value) + code_name_obj = GeoCodeName.objects.get(code=field_value) self._code_name_cache[field_value] = code_name_obj - except CodeName.DoesNotExist: + except GeoCodeName.DoesNotExist: self._code_name_cache[field_value] = None continue diff --git a/datastore/tests/test_additional_data_code_names.py b/datastore/tests/test_additional_data_code_names.py index 0cad8203..73b9cf3d 100644 --- a/datastore/tests/test_additional_data_code_names.py +++ b/datastore/tests/test_additional_data_code_names.py @@ -2,15 +2,15 @@ from pathlib import PurePath from django.test import TestCase -from additional_data.models import CodeName -from additional_data.sources.code_names import CodeNamesSource +from additional_data.models import GeoCodeName +from additional_data.sources.geocode_names import GeoCodeNamesSource test_files_dir = PurePath(__file__).parent.joinpath("files") class TestAdditionalDataCodeNames(TestCase): def test_import_code_names_with_data(self): - code_names = CodeNamesSource() + code_names = GeoCodeNamesSource() with requests_mock.Mocker() as m: with open( @@ -20,9 +20,9 @@ def test_import_code_names_with_data(self): m.get("{}".format(code_names.CHD_URL), body=infile) code_names.import_code_names() - self.assertEqual(len(CodeName.objects.all()), 3) + self.assertEqual(len(GeoCodeName.objects.all()), 3) # check one example - code_names_object = CodeName.objects.filter(code="S12000015") + code_names_object = GeoCodeName.objects.filter(code="S12000015") self.assertTrue(len(code_names_object), 1) self.assertEqual( code_names_object[0].data, @@ -50,7 +50,7 @@ def test_import_code_names_with_data(self): ) def test_import_code_names_without_data(self): - code_names = CodeNamesSource() + code_names = GeoCodeNamesSource() with requests_mock.Mocker() as m: with open( @@ -60,11 +60,11 @@ def test_import_code_names_without_data(self): m.get("{}".format(code_names.CHD_URL), body=infile) code_names.import_code_names() - self.assertEqual(len(CodeName.objects.all()), 0) + self.assertEqual(len(GeoCodeName.objects.all()), 0) def test_import_code_names_deletes_previous_records(self): # When import_code_names is run, should delete CodeName model data. - code_names = CodeNamesSource() + code_names = GeoCodeNamesSource() with requests_mock.Mocker() as m: with open( @@ -74,7 +74,7 @@ def test_import_code_names_deletes_previous_records(self): m.get("{}".format(code_names.CHD_URL), body=infile) code_names.import_code_names() - self.assertTrue(CodeName.objects.filter(code="S12000015").first()) + self.assertTrue(GeoCodeName.objects.filter(code="S12000015").first()) with open( test_files_dir.joinpath("code_names_with_data_bis.zip"), "rb" @@ -83,6 +83,10 @@ def test_import_code_names_deletes_previous_records(self): m.get("{}".format(code_names.CHD_URL), body=infile) code_names.import_code_names() - self.assertEqual(len(CodeName.objects.all()), 6) - self.assertFalse(CodeName.objects.filter(code="S12000015").first()) - self.assertTrue(CodeName.objects.filter(code="E32000003").first()) + self.assertEqual(len(GeoCodeName.objects.all()), 6) + self.assertFalse( + GeoCodeName.objects.filter(code="S12000015").first() + ) + self.assertTrue( + GeoCodeName.objects.filter(code="E32000003").first() + ) diff --git a/datastore/tests/test_additional_data_codelist_code.py b/datastore/tests/test_additional_data_codelist_code.py new file mode 100644 index 00000000..ee375158 --- /dev/null +++ b/datastore/tests/test_additional_data_codelist_code.py @@ -0,0 +1,37 @@ +from django.test import TestCase +from additional_data.sources.codelist_code import CodeListSource + + +class TestCodeLists(TestCase): + def test_code_list(self): + source = CodeListSource() + source.import_codelists() + + grant = { + "toIndividualDetails": { + "primaryGrantReason": "GTIR040", + "grantPurpose": "GTIP170", + }, + "regrantType": "FRG010", + } + + additional_data_in = {} + + additional_data_out = { + "codeListLookup": { + "toIndividualDetails": { + "primaryGrantReason": "Mental Health", + "secondaryGrantReason": "", + "grantPurpose": "Exceptional costs", + }, + "regrantType": "Common Regrant", + } + } + + source.update_additional_data(grant, additional_data_in) + + self.assertEqual( + additional_data_in, + additional_data_out, + "The expected additional data isn't correct", + ) diff --git a/datastore/tests/test_additional_data_nspl.py b/datastore/tests/test_additional_data_nspl.py index 6ce22a0d..fa239726 100644 --- a/datastore/tests/test_additional_data_nspl.py +++ b/datastore/tests/test_additional_data_nspl.py @@ -4,7 +4,7 @@ from django.test import TestCase from additional_data.models import NSPL -from additional_data.sources.code_names import CodeNamesSource +from additional_data.sources.geocode_names import GeoCodeNamesSource from additional_data.sources.nspl import NSPLSource from db.models import Grant @@ -148,7 +148,7 @@ def test_nspl_update_additional_data_with_existing_postcode_no_code_names(self): def test_nspl_update_additional_data_with_existing_postcode_with_code_names(self): # Import Code Names data. - code_names = CodeNamesSource() + code_names = GeoCodeNamesSource() with requests_mock.Mocker() as m: with open( "./datastore/tests/files/code_names_with_data.zip", "rb"