Skip to content

Commit

Permalink
Merge d68b96e into 84c9699
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed May 3, 2023
2 parents 84c9699 + d68b96e commit b1b4864
Show file tree
Hide file tree
Showing 89 changed files with 4,281 additions and 2,286 deletions.
8 changes: 7 additions & 1 deletion .docker-compose.env
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
CELERY_BROKER_URL=amqp://guest:guest@rabbitmq:5672
DATABASE_HOST=postgres
ELASTICSEARCH_URL=http://elasticsearch:9200/
ELASTICSEARCH8_URL=https://elastic8:9200/
# ELASTICSEARCH5_URL=http://elasticsearch:9200/
EMBER_SHARE_URL=http://frontend:4200
LOGIN_REDIRECT_URL=http://localhost:8003/
OSF_API_URL=http://localhost:8000
RABBITMQ_HOST=rabbitmq
RABBITMQ_PORT=5672
SHARE_API_URL=http://web:8000/
LOG_LEVEL=INFO

# allow localhost and the localhost-loopback ip used by local osf
# (see https://github.com/CenterForOpenScience/osf.io/blob/develop/README-docker-compose.md)
ALLOWED_HOSTS=localhost 192.168.168.167

#PYTHONUNBUFFERED=0 # This when set to 0 will allow print statements to be visible in the Docker logs
61 changes: 33 additions & 28 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,19 @@ on:
workflow_dispatch:

permissions:
checks: write
checks: write # for coveralls

jobs:

run_tests:
strategy:
fail-fast: false
matrix:
python-version: [3.6]
python-version: ['3.11'] # TODO: 3.12
postgres-version: ['10', '11', '12', '13', '14', '15']
runs-on: ubuntu-latest


services:

postgres:
image: postgres:10
image: postgres:${{ matrix.postgres-version }}
env:
POSTGRES_PASSWORD: postgres
# Set health checks to wait until postgres has started
Expand All @@ -31,34 +29,41 @@ jobs:
--health-retries 5
ports:
- 5432:5432

elasticsearch:
elasticsearch8:
image: elasticsearch:8.7.0
env:
xpack.security.enabled: false
node.name: singlenode
cluster.initial_master_nodes: singlenode
ports:
- 9208:9200
elasticsearch5:
image: elasticsearch:5.4
env:
ES_JAVA_OPTS: "-Xms512m -Xmx512m"
ports:
- 9200:9200

- 9205:9200
rabbitmq:
image: rabbitmq:management
ports:
- 5672:5672
- 15672:15672
steps:
- uses: actions/checkout@v2

- name: set up python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- uses: actions/checkout@v3

- name: install non-py dependencies
run: sudo apt-get install libxml2-dev libxslt1-dev libpq-dev
run: sudo apt-get update && sudo apt-get install -y libxml2-dev libxslt1-dev libpq-dev git gcc

- name: cache py dependencies
id: cache-requirements
uses: actions/cache@v2
- name: set up python${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
path: ${{ env.pythonLocation }}
key: ${{ env.pythonLocation }}-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
python-version: ${{ matrix.python-version }}
cache: pip
cache-dependency-path: |
requirements.txt
dev-requirements.txt
- name: install py dependencies
if: steps.cache-requirements.outputs.cache-hit != 'true'
run: pip install -r dev-requirements.txt

- name: install share
Expand All @@ -73,8 +78,8 @@ jobs:
coverage run --append -m behave
env:
DATABASE_PASSWORD: postgres
ELASTICSEARCH8_URL: http://localhost:9208/
ELASTICSEARCH5_URL: http://localhost:9205/

- name: send coverage report
run: coveralls --service=github
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: coveralls
uses: coverallsapp/github-action@v2
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# Change Log

# [23.0.0] - 2023-05-03
- upgrade to python 3.11
- upgrade to elasticsearch 8
- add `share.search.index_strategy` to act as a slippery abstraction layer between search-engine backend and planned friendly search api
- configure two index strategies (and make it easy to add more in the future):
- `sharev2_elastic5`: the existing/legacy SHAREv2 search index as exists on elasticsearch5 and exposed via `/api/v2/search/creativeworks/_search`
- `sharev2_elastic8`: a mirror/replacement for `sharev2_elastic5` with all the same `_source` docs (but possible incompatibilities for the existing pass-thru api)
- add a happy-path index-backfill workflow to the admin interface at `/admin/search-indexes`
- when changing index-strategy settings/mappings/whatever, the "happy path" is to create, backfill, verify a new copy of the index; then switch which is used for searching, verify again, and finally delete the old index.
- not intended to have the power of a full elasticsearch management interface -- just enough visibility to see whether things are going ok and where to start looking if something goes wrong
- for testing, support `indexStrategy` query param to `/api/v2/search/creativeworks/_search`, `/api/feeds/rss`, `/api/feeds/atom`
- may request a configured strategy (e.g. `indexStrategy=sharev2_elastic8`) or a specific version of an index within a strategy (e.g. `indexStrategy=sharev2_elastic8__bcaa90e8fa8a772580040a8edbedb5f727202d1fca20866948bc0eb0e935e51f`)
- add `FeatureFlag` model, use it to switch default search strategy (`name="elastic_eight_default"`)

# [22.0.1] - 2022-08-29
- add `suid` value to `sharev2_elastic` index

Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.6-slim-buster as app
FROM python:3.11-slim as app

RUN apt-get update \
&& apt-get install -y \
Expand Down Expand Up @@ -26,7 +26,7 @@ RUN mkdir -p /code
WORKDIR /code

RUN pip install -U pip
RUN pip install uwsgi==2.0.16
RUN pip install uwsgi==2.0.21

COPY ./requirements.txt /code/requirements.txt
COPY ./constraints.txt /code/constraints.txt
Expand Down
2 changes: 1 addition & 1 deletion WHAT-IS-THIS-EVEN.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ and [OSF Registries](https://osf.io/registries/discover) show only registrations
and preprints, respectively, which are hosted on OSF infrastructure.

To learn about using the API (instead of a user interface), see
[USING-THE-API.md](./USING-THE-API.md)
[how-to/use-the-api.md](./how-to/use-the-api.md)
33 changes: 5 additions & 28 deletions api/search/urls.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,14 @@
from django.urls import re_path as url
from django.urls import re_path
from django.views.decorators.csrf import csrf_exempt

from api.search import views


urlpatterns = [
# only match _count and _search requests
url(
r'^(?P<url_bits>(?:\w+/)?_(?:search|count)/?)$',
csrf_exempt(views.ElasticSearchView.as_view()),
re_path(
# sharev2 back-compat
r'^creativeworks/_search/?$',
csrf_exempt(views.Sharev2ElasticSearchView.as_view()),
name='search'
),
# match _suggest requests
url(
r'^(?P<url_bits>(?:\w+/)?_(?:suggest)/?)$',
csrf_exempt(views.ElasticSearchPostOnlyView.as_view()),
name='search_post'
),
# match _mappings requests
url(
r'^(?P<url_bits>_mappings(/.+|$|/))',
csrf_exempt(views.ElasticSearchGetOnlyView.as_view()),
name='search_get'
),
# match specific document requests
url(
r'^(?P<url_bits>[^_][\w_-]+/[^_][\w_-]+/?$)',
csrf_exempt(views.ElasticSearchGetOnlyView.as_view()),
name='search_get'
),
url(
r'^(?P<url_bits>.*)',
csrf_exempt(views.ElasticSearch403View.as_view()),
name='search_403'
),
]
138 changes: 26 additions & 112 deletions api/search/views.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
import requests

from furl import furl

from django import http
from django.conf import settings

from rest_framework import views
from rest_framework.parsers import JSONParser
Expand All @@ -12,123 +7,42 @@
from rest_framework.response import Response

from api import authentication
from share.search import exceptions
from share.search.index_strategy import IndexStrategy


class ElasticSearch403View(views.APIView):
"""
Elasticsearch endpoint for unsupported queries.
"""
authentication_classes = (authentication.NonCSRFSessionAuthentication, )
parser_classes = (JSONParser,)
permission_classes = (AllowAny, )
renderer_classes = (JSONRenderer, )

def get(self, request, *args, **kwargs):
return http.HttpResponseForbidden()

def post(self, request, *args, **kwargs):
return http.HttpResponseForbidden()


class ElasticSearchGetOnlyView(views.APIView):
"""
Elasticsearch get only endpoint for SHARE Data.
- _mappings
"""
authentication_classes = (authentication.NonCSRFSessionAuthentication, )
parser_classes = (JSONParser,)
permission_classes = (AllowAny, )
renderer_classes = (JSONRenderer, )

def get(self, request, *args, url_bits='', **kwargs):
params = request.query_params.copy()

v = params.pop('v', None)
index = settings.ELASTICSEARCH['PRIMARY_INDEX']
if v:
v = 'v{}'.format(v[0])
if v not in settings.ELASTICSEARCH['INDEX_VERSIONS']:
return http.HttpResponseBadRequest('Invalid search index version')
index = '{}_{}'.format(index, v)
es_url = furl(settings.ELASTICSEARCH['URL']).add(path=index, query_params=params).add(path=url_bits.split('/'))

if request.method == 'GET':
resp = requests.get(es_url)
else:
raise NotImplementedError()
return Response(status=resp.status_code, data=resp.json(), headers={'Content-Type': 'application/vnd.api+json'})


class ElasticSearchPostOnlyView(views.APIView):
"""
Elasticsearch post only endpoint for SHARE Data.
- _suggest
"""
authentication_classes = (authentication.NonCSRFSessionAuthentication, )
parser_classes = (JSONParser,)
permission_classes = (AllowAny, )
renderer_classes = (JSONRenderer, )

def post(self, request, *args, url_bits='', **kwargs):
params = request.query_params.copy()

v = params.pop('v', None)
index = settings.ELASTICSEARCH['PRIMARY_INDEX']
if v:
v = 'v{}'.format(v[0])
if v not in settings.ELASTICSEARCH['INDEX_VERSIONS']:
return http.HttpResponseBadRequest('Invalid search index version')
index = '{}_{}'.format(index, v)
es_url = furl(settings.ELASTICSEARCH['URL']).add(path=index, query_params=params).add(path=url_bits.split('/'))

if request.method == 'POST':
resp = requests.post(es_url, json=request.data)
else:
raise NotImplementedError()
return Response(status=resp.status_code, data=resp.json(), headers={'Content-Type': 'application/vnd.api+json'})


class ElasticSearchView(views.APIView):
class Sharev2ElasticSearchView(views.APIView):
"""
Elasticsearch endpoint for SHARE Data.
- [Creative Works](/api/v2/search/creativeworks/_search) - Search individual documents harvested
- [Agents](/api/v2/search/agents/_search) - Search agents from havested documents
- [Tags](/api/v2/search/tags/_search) - Tags placed on documents
- [Sources](/api/v2/search/sources/_search) - Data sources
"""
authentication_classes = (authentication.NonCSRFSessionAuthentication, )
parser_classes = (JSONParser,)
permission_classes = (AllowAny, )
renderer_classes = (JSONRenderer, )

def get(self, request, *args, url_bits='', **kwargs):
return self._handle_request(request, url_bits)

def post(self, request, *args, url_bits='', **kwargs):
return self._handle_request(request, url_bits)
def get(self, request):
return self._handle_request(request)

def _handle_request(self, request, url_bits):
params = request.query_params.copy()
def post(self, request):
return self._handle_request(request)

if 'scroll' in params:
def _handle_request(self, request):
queryparams = request.query_params.dict()
requested_index_strategy = queryparams.pop('indexStrategy', None)
if 'scroll' in queryparams:
return http.HttpResponseForbidden(reason='Scroll is not supported.')

v = params.pop('v', None)
index = settings.ELASTICSEARCH['PRIMARY_INDEX']
if v:
v = 'v{}'.format(v[0])
if v not in settings.ELASTICSEARCH['INDEX_VERSIONS']:
return http.HttpResponseBadRequest('Invalid search index version')
index = '{}_{}'.format(index, v)
es_url = furl(settings.ELASTICSEARCH['URL']).add(path=index, query_params=params).add(path=url_bits.split('/'))

if request.method == 'GET':
resp = requests.get(es_url)
elif request.method == 'POST':
resp = requests.post(es_url, json=request.data)
else:
raise NotImplementedError()
return Response(status=resp.status_code, data=resp.json(), headers={'Content-Type': 'application/vnd.api+json'})
try:
specific_index = IndexStrategy.get_for_searching(
requested_index_strategy,
with_default_fallback=True,
)
except exceptions.IndexStrategyError as error:
raise http.Http404(str(error))
try:
response_json = specific_index.pls_handle_query__sharev2_backcompat(
request_body=request.data,
request_queryparams=queryparams,
)
return Response(data=response_json, headers={'Content-Type': 'application/json'})
except (exceptions.IndexStrategyError, NotImplementedError) as error:
return Response(status=418, data=str(error)) # TODO
3 changes: 2 additions & 1 deletion api/sources/serializers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import re

import requests

Expand Down Expand Up @@ -96,7 +97,7 @@ def create(self, validated_data):

icon_file = self._fetch_icon_file(icon_url)

username = long_title.replace(' ', '_').lower()
username = re.sub(r'[^\w.@+-]', '_', long_title).lower()
name = validated_data.pop('name', username)

with transaction.atomic():
Expand Down
Loading

0 comments on commit b1b4864

Please sign in to comment.