Skip to content

Commit

Permalink
feat: import old applications from spreadsheet file (hl-1310) (#3056)
Browse files Browse the repository at this point in the history
* feat(backend): import old applications from excel

* feat(handler): search functionality for archival applications

* refactor: pick only the fields that are necessary for import

* feat: add start_date as handled_at because data is missing in spreadsheet
  • Loading branch information
sirtawast committed Jun 13, 2024
1 parent e511717 commit 0302394
Show file tree
Hide file tree
Showing 22 changed files with 880 additions and 89 deletions.
18 changes: 16 additions & 2 deletions backend/benefit/applications/api/v1/application_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

from applications.api.v1.serializers.application import (
ApplicantApplicationSerializer,
ArchivalApplicationListSerializer,
HandlerApplicationSerializer,
)
from applications.api.v1.serializers.application_alteration import (
Expand All @@ -41,7 +42,12 @@
ApplicationOrigin,
ApplicationStatus,
)
from applications.models import Application, ApplicationAlteration, ApplicationBatch
from applications.models import (
Application,
ApplicationAlteration,
ApplicationBatch,
ArchivalApplication,
)
from applications.services.ahjo_integration import (
ExportFileInfo,
generate_zip,
Expand Down Expand Up @@ -584,7 +590,15 @@ def simplified_application_list(self, request):
application_origin=ApplicationOrigin.APPLICANT,
)
serializer = self.serializer_class(qs, many=True, context=context)
return Response(serializer.data, status=status.HTTP_200_OK)

archived = ArchivalApplicationListSerializer(
ArchivalApplication.objects.all(), many=True
)

return Response(
serializer.data + archived.data,
status=status.HTTP_200_OK,
)

@action(methods=["GET"], detail=False)
def export_csv(self, request) -> StreamingHttpResponse:
Expand Down
223 changes: 166 additions & 57 deletions backend/benefit/applications/api/v1/search_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@
from rest_framework.response import Response
from rest_framework.views import APIView

from applications.api.v1.serializers.application import HandlerApplicationListSerializer
from applications.models import Application
from applications.api.v1.serializers.application import (
ArchivalApplicationListSerializer,
HandlerApplicationListSerializer,
)
from applications.models import Application, ArchivalApplication
from common.permissions import BFIsHandler


Expand All @@ -27,6 +30,7 @@ class SearchPattern(models.TextChoices):
IN_MEMORY = "in-memory", _("In-memory")
NUMBERS = "numbers", _("Numbers")
SSN = "ssn", _("SSN")
ARCHIVAL = "archived_application", _("Archival application")


class SearchView(APIView):
Expand All @@ -37,67 +41,124 @@ class SearchView(APIView):
]

def get(self, request):
applications = []
search_string = (
request.query_params.get("q", "").strip() if "q" in request.GET else None
)
archived = request.query_params.get("archived") == "1" or False
search_from_archival = request.query_params.get("archival") == "1" or False

if search_string is None:
return Response(
{"error": _("Search query 'q' is required")},
status=status.HTTP_400_BAD_REQUEST,
)

filters = _detect_filters(search_string)
in_memory_filter_str = filters["in_memory_filter_str"]
search_string = filters["search_string"]
detected_pattern = filters["detected_pattern"]

if re.search("^HEL\\s\\d+", search_string, re.I):
# Archival application number starts with R or Y followed by numbers
if re.search("^[RY]\\d+", search_string, re.I) and search_from_archival:
detected_pattern = SearchPattern.ARCHIVAL
# Ahjo case id starts with HEL and is followed by numbers
elif re.search("^HEL\\s\\d+", search_string, re.I):
detected_pattern = SearchPattern.AHJO
# Social security number is 6 digits followed by a separator and 3 digits
elif re.search("^\\d{6}([-+a-z])\\d{3}\\w", search_string, re.I):
detected_pattern = SearchPattern.SSN
# Other numbers assumed as application number or company business id
elif re.search("^\\d+", search_string):
detected_pattern = SearchPattern.NUMBERS

if detected_pattern in [SearchPattern.AHJO, SearchPattern.NUMBERS]:
return _query_and_respond_to_numbers(search_string, detected_pattern)
elif detected_pattern == SearchPattern.SSN:
return _query_and_respond_to_ssn(
search_string,
detected_pattern,
)
# Perform trigram query for company name
elif detected_pattern in [SearchPattern.COMPANY, SearchPattern.IN_MEMORY]:
applications = _query_for_company(archived, search_string)

data = HandlerApplicationListSerializer(applications, many=True).data
in_memory_results = None

# Use filter string to perform in-memory search
if (
detected_pattern in [SearchPattern.COMPANY, SearchPattern.IN_MEMORY]
and in_memory_filter_str != ""
):
in_memory_results = _perform_in_memory_search(
data,
return search_applications(
search_string,
in_memory_filter_str,
detected_pattern,
archived,
search_from_archival,
)


def search_applications(
search_string,
in_memory_filter_str,
detected_pattern,
archived,
search_from_archival=False,
) -> Response:
# Return early in case of number-like pattern
if detected_pattern in [SearchPattern.AHJO, SearchPattern.NUMBERS]:
return _query_and_respond_to_numbers(
search_string, detected_pattern, search_from_archival
)
elif detected_pattern == SearchPattern.SSN:
return _query_and_respond_to_ssn(
search_string,
detected_pattern,
)
elif detected_pattern == SearchPattern.ARCHIVAL:
return _query_and_respond_to_archival_application(
search_string, detected_pattern
)

# Perform trigram query for company name
if detected_pattern in [SearchPattern.COMPANY, SearchPattern.IN_MEMORY]:
results_for_related_company = _query_for_company(
archived, search_string, search_from_archival
)
applications = results_for_related_company["applications"]
archival_applications = results_for_related_company["archival"]

applications = HandlerApplicationListSerializer(applications, many=True).data

filtered_data = []
in_memory_results = None

# Use filter string to perform in-memory search
if (
detected_pattern in [SearchPattern.COMPANY, SearchPattern.IN_MEMORY]
and in_memory_filter_str != ""
):
in_memory_results = _perform_in_memory_search(
applications,
detected_pattern,
archived,
search_string,
in_memory_filter_str,
)
filtered_data = in_memory_results["data"]

if search_from_archival:
archival_data = ArchivalApplicationListSerializer(
archival_applications, many=True
).data
in_memory_results_archival = _perform_in_memory_search(
archival_data,
detected_pattern,
archived,
search_string,
in_memory_filter_str,
True,
)
data = in_memory_results["data"]
detected_pattern = in_memory_results["detected_pattern"]
in_memory_results_archival["data"]
filtered_data += in_memory_results_archival["data"]

return _create_search_response(
None,
data,
detected_pattern,
search_string,
in_memory_results,
in_memory_filter_str,
)
detected_pattern = in_memory_results["detected_pattern"]
else:
filtered_data = applications
if search_from_archival:
filtered_data += ArchivalApplicationListSerializer(
archival_applications, many=True
).data

return _create_search_response(
None,
filtered_data,
detected_pattern,
search_string,
in_memory_results,
in_memory_filter_str,
)


def _fuzzy_matching(applications, query, threshold):
Expand Down Expand Up @@ -167,7 +228,20 @@ def _query_and_respond_to_ssn(search_query_str, detected_pattern):
)


def _query_and_respond_to_numbers(search_query_str, detected_pattern):
def _query_and_respond_to_archival_application(search_query_str, detected_pattern):
applications = ArchivalApplication.objects.filter(
Q(application_number__icontains=search_query_str)
)
return _create_search_response(
applications=applications,
serialized_data=None,
detected_pattern=detected_pattern,
search_query_str=search_query_str,
serializer=ArchivalApplicationListSerializer,
)


def _query_and_respond_to_numbers(search_query_str, detected_pattern, archival=False):
"""
Perform simple LIKE query for application number, AHJO case ID and company business ID
"""
Expand All @@ -176,11 +250,19 @@ def _query_and_respond_to_numbers(search_query_str, detected_pattern):
| Q(ahjo_case_id__icontains=search_query_str)
| Q(application_number__icontains=search_query_str)
)
data = HandlerApplicationListSerializer(applications, many=True).data

if archival:
archival_applications = ArchivalApplication.objects.filter(
Q(company__business_id__icontains=search_query_str)
)
data += ArchivalApplicationListSerializer(archival_applications, many=True).data

return _create_search_response(
applications,
None,
detected_pattern,
search_query_str,
applications=None,
serialized_data=data,
detected_pattern=detected_pattern,
search_query_str=search_query_str,
)


Expand All @@ -191,9 +273,10 @@ def _create_search_response(
search_query_str: str,
in_memory_results: Union[dict, None] = None,
in_memory_filter_str="",
serializer=HandlerApplicationListSerializer,
):
if serialized_data is None:
serialized_data = HandlerApplicationListSerializer(applications, many=True).data
serialized_data = serializer(applications, many=True).data

return Response(
{
Expand All @@ -209,16 +292,28 @@ def _create_search_response(


def _perform_in_memory_search(
data, detected_pattern, archived, search_string, in_memory_filter_str
data,
detected_pattern,
archived,
search_string,
in_memory_filter_str,
archival_applications=False,
):
"""Perform more expensive in-memory search from within applications"""
if detected_pattern == SearchPattern.COMPANY:
in_memory_filter_str = search_string

# No previous search results, use all applications as haystack
if len(data) == 0 or search_string == "":
applications = Application.objects.filter(archived=archived)
data = HandlerApplicationListSerializer(applications, many=True).data
applications = (
ArchivalApplication.objects.all()
if archival_applications
else Application.objects.filter(archived=archived)
)
data = (
ArchivalApplicationListSerializer(applications, many=True).data
if archival_applications
else HandlerApplicationListSerializer(applications, many=True).data
)
detected_pattern = f"{SearchPattern.ALL} {SearchPattern.IN_MEMORY}"
else:
detected_pattern = f"{SearchPattern.COMPANY} {SearchPattern.IN_MEMORY}"
Expand All @@ -237,20 +332,34 @@ def _perform_in_memory_search(
return {**in_memory_results, **{"detected_pattern": detected_pattern}}


def _query_for_company(archived, search_string):
def _query_for_company(archived, search_string, search_from_archival):
search_vectors = SearchVector("company__name")
query = SearchQuery(search_string, search_type="websearch")

return (
Application.objects.filter(archived=archived)
.annotate(
search=search_vectors,
similarity=TrigramSimilarity("company__name", search_string),
rank=SearchRank(search_vectors, query),
)
.filter(similarity__gt=0.3)
.order_by("-similarity", "-handled_at")
)
return {
"applications": (
Application.objects.filter(archived=archived)
.annotate(
search=search_vectors,
similarity=TrigramSimilarity("company__name", search_string),
rank=SearchRank(search_vectors, query),
)
.filter(similarity__gt=0.3)
.order_by("-similarity", "-handled_at")
),
"archival": (
ArchivalApplication.objects.all()
.annotate(
search=search_vectors,
similarity=TrigramSimilarity("company__name", search_string),
rank=SearchRank(search_vectors, query),
)
.filter(similarity__gt=0.3)
.order_by("-similarity", "-handled_at")
if search_from_archival
else []
),
}


def _detect_filters(search_string):
Expand Down
Loading

0 comments on commit 0302394

Please sign in to comment.