Skip to content
Closed
1 change: 1 addition & 0 deletions api/webview/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
url(r'^documents/status', views.status, name='status'),
url(r'^documents/(?P<source>\w+)/$', views.DocumentsFromSource.as_view(), name='source'),
url(r'^documents/(?P<source>[a-z]+)/(?P<docID>(.*))/$', views.document_detail, name='document_detail'),
url(r'^documents/from=(?P<from>\d{4}-\d{2}-\d{2})&until=(?P<until>\d{4}-\d{2}-\d{2})/$', views.DocumentsByProviderUpdatedDateTime.as_view(), name='providerupdate'),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of adding a new URL route, can you add this optional query parameter to the documents/ route?

url(r'^institutions', views.institutions, name='institutions'),
url(r'^robots\.txt$', include('robots.urls')),
]
25 changes: 22 additions & 3 deletions api/webview/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from rest_framework.response import Response
from rest_framework.decorators import api_view
from django.views.decorators.clickjacking import xframe_options_exempt

from dateutil.parser import parse
from elasticsearch import Elasticsearch

from scrapi import settings
Expand All @@ -28,7 +28,8 @@ def perform_create(self, serializer):
def get_queryset(self):
""" Return all documents
"""
return Document.objects.all()
queryset = Document.objects.all().exclude(normalized=None)
return queryset


class DocumentsFromSource(generics.ListAPIView):
Expand All @@ -44,7 +45,25 @@ def perform_create(self, serializer):
def get_queryset(self):
""" Return queryset based on source
"""
return Document.objects.filter(source=self.kwargs['source'])
return Document.objects.filter(source=self.kwargs['source']).exclude(normalized=None)


class DocumentsByProviderUpdatedDateTime(generics.ListAPIView):
"""
List all documents updated within specified time frame
"""
serializer_class = DocumentSerializer
permission_classes = (permissions.IsAuthenticatedOrReadOnly,)

def perform_create(self, serializer):
serializer.save(source=self.request.user)

def get_queryset(self):
""" Return queryset based on provider update time
"""
queryset = Document.objects.all()
queryset = queryset.filter(providerUpdatedDateTime__gte=parse(self.kwargs['from'])).filter(providerUpdatedDateTime__lte=parse(self.kwargs['until']))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is awesome! Can you modify it so that instead of a new view, it checks for the argument in the normal document list view? It should add a filter only if the query parameter is there, and do nothing if it isn't. Check out the source one that gloria added in her PR.

Also, you can go ahead and add a filter on the initial queryset, instead of first querying for everything and then adding a filter (it will be faster!)

so, something like:

filters = {}
if self.kwargs.get('from', None):
    filters['from'] = self.kwargs['from']
   # other optional filters here

return queryset(**filters)

return queryset


@api_view(['GET'])
Expand Down
47 changes: 43 additions & 4 deletions tests/test_api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "api.api.settings")

import django
from django.test import TestCase
from rest_framework.test import APIRequestFactory

from api.webview.views import DocumentList, status, institutions
from django.test import TestCase
from rest_framework.test import APIRequestFactory, APITestCase, APIClient, force_authenticate
from dateutil.parser import parse
from api.webview.views import DocumentList, status, institutions, DocumentsByProviderUpdatedDateTime
from api.webview.models import Document

django.setup()

Expand Down Expand Up @@ -60,4 +61,42 @@ def test_institutions(self):
)
response = view(request)
self.assertEqual(response.status_code, 200)
def test_exclude_non_normalized_documents(self):
view = DocumentList.as_view()
create_document(source="bad",normalized=None)
create_document(source="good",normalized="This is Normalized")
request = self.factory.get(
'/documents/'
)
response = view(request)
self.assertNotContains(response, "bad",
status_code=200)
self.assertContains(response, "good", status_code=200)
print(response)


class APIViewTests2(APITestCase):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once you incorporate your code into the document List view, you shouldbe able to add these tests back into the first class - no need for a second


def test_query_search_by_providerupdatedtime(self):
view = DocumentsByProviderUpdatedDateTime.as_view()
create_new_document(source = "tooearly",providerUpdatedDateTime= parse("2012-01-01"))
create_new_document(source = "rightontime", providerUpdatedDateTime= parse("2013-01-05"))
create_new_document(source= "toolate", providerUpdatedDateTime = parse("2015-01-01"))
response = self.client.get('/documents/from=2013-01-01&until=2014-12-30/', kwargs= {'from':"2013-01-01",'until':'2014-12-30'})
force_authenticate(response)
self.assertNotContains(response, "tooearly", status_code=200)
self.assertContains(response, "rightontime", status_code=200)
self.assertNotContains(response, "toolate", status_code =200)


def create_new_document(source,providerUpdatedDateTime):
return Document.objects.create(source = source, providerUpdatedDateTime = providerUpdatedDateTime)
def create_document(source,normalized):
return Document.objects.create(source= source,normalized= normalized)


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can remove these newlines at the bottom and just leave one!