diff --git a/api/webview/urls.py b/api/webview/urls.py index 38f8c658..8715adb9 100644 --- a/api/webview/urls.py +++ b/api/webview/urls.py @@ -7,6 +7,7 @@ url(r'^documents/status', views.status, name='status'), url(r'^documents/(?P\w+)/$', views.DocumentsFromSource.as_view(), name='source'), url(r'^documents/(?P[a-z]+)/(?P(.*))/$', views.document_detail, name='document_detail'), + url(r'^documents/from=(?P\d{4}-\d{2}-\d{2})&until=(?P\d{4}-\d{2}-\d{2})/$', views.DocumentsByProviderUpdatedDateTime.as_view(), name='providerupdate'), url(r'^institutions', views.institutions, name='institutions'), url(r'^robots\.txt$', include('robots.urls')), ] diff --git a/api/webview/views.py b/api/webview/views.py index bc077f54..ca1aadc9 100644 --- a/api/webview/views.py +++ b/api/webview/views.py @@ -5,7 +5,7 @@ from rest_framework.response import Response from rest_framework.decorators import api_view from django.views.decorators.clickjacking import xframe_options_exempt - +from dateutil.parser import parse from elasticsearch import Elasticsearch from scrapi import settings @@ -28,7 +28,8 @@ def perform_create(self, serializer): def get_queryset(self): """ Return all documents """ - return Document.objects.all() + queryset = Document.objects.all().exclude(normalized=None) + return queryset class DocumentsFromSource(generics.ListAPIView): @@ -44,7 +45,25 @@ def perform_create(self, serializer): def get_queryset(self): """ Return queryset based on source """ - return Document.objects.filter(source=self.kwargs['source']) + return Document.objects.filter(source=self.kwargs['source']).exclude(normalized=None) + + +class DocumentsByProviderUpdatedDateTime(generics.ListAPIView): + """ + List all documents updated within specified time frame + """ + serializer_class = DocumentSerializer + permission_classes = (permissions.IsAuthenticatedOrReadOnly,) + + def perform_create(self, serializer): + serializer.save(source=self.request.user) + + def get_queryset(self): + """ Return queryset based on provider update time + """ + queryset = Document.objects.all() + queryset = queryset.filter(providerUpdatedDateTime__gte=parse(self.kwargs['from'])).filter(providerUpdatedDateTime__lte=parse(self.kwargs['until'])) + return queryset @api_view(['GET']) diff --git a/tests/test_api_views.py b/tests/test_api_views.py index b903140c..0f26dd74 100644 --- a/tests/test_api_views.py +++ b/tests/test_api_views.py @@ -2,10 +2,11 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "api.api.settings") import django -from django.test import TestCase -from rest_framework.test import APIRequestFactory - -from api.webview.views import DocumentList, status, institutions +from django.test import TestCase +from rest_framework.test import APIRequestFactory, APITestCase, APIClient, force_authenticate +from dateutil.parser import parse +from api.webview.views import DocumentList, status, institutions, DocumentsByProviderUpdatedDateTime +from api.webview.models import Document django.setup() @@ -60,4 +61,42 @@ def test_institutions(self): ) response = view(request) self.assertEqual(response.status_code, 200) + def test_exclude_non_normalized_documents(self): + view = DocumentList.as_view() + create_document(source="bad",normalized=None) + create_document(source="good",normalized="This is Normalized") + request = self.factory.get( + '/documents/' + ) + response = view(request) + self.assertNotContains(response, "bad", + status_code=200) + self.assertContains(response, "good", status_code=200) + print(response) + + +class APIViewTests2(APITestCase): + + def test_query_search_by_providerupdatedtime(self): + view = DocumentsByProviderUpdatedDateTime.as_view() + create_new_document(source = "tooearly",providerUpdatedDateTime= parse("2012-01-01")) + create_new_document(source = "rightontime", providerUpdatedDateTime= parse("2013-01-05")) + create_new_document(source= "toolate", providerUpdatedDateTime = parse("2015-01-01")) + response = self.client.get('/documents/from=2013-01-01&until=2014-12-30/', kwargs= {'from':"2013-01-01",'until':'2014-12-30'}) + force_authenticate(response) + self.assertNotContains(response, "tooearly", status_code=200) + self.assertContains(response, "rightontime", status_code=200) + self.assertNotContains(response, "toolate", status_code =200) + + +def create_new_document(source,providerUpdatedDateTime): + return Document.objects.create(source = source, providerUpdatedDateTime = providerUpdatedDateTime) +def create_document(source,normalized): + return Document.objects.create(source= source,normalized= normalized) + + + + + +