diff --git a/data/organisations/data.json b/data/organisations/data.json
index 8baa3fa2..e5d5d48a 100644
--- a/data/organisations/data.json
+++ b/data/organisations/data.json
@@ -51,6 +51,7 @@
],
"isShared": true,
"isDedicated": true,
+ "serverName": "fredi.hepvs.ch",
"platformName": "FREDI",
"files": [
{
@@ -64,6 +65,7 @@
"name": "Haute École Pédagogique BEJUNE",
"isShared": true,
"isDedicated": true,
+ "serverName": "roar.hep-bejune.ch",
"platformName": "# ROAR\n#### Répertoire ouvert et archives BEJUNE",
"files": [
{
@@ -113,6 +115,7 @@
],
"isShared": true,
"isDedicated": true,
+ "serverName": "folia.unifr.ch",
"platformName": "# FOLIA\n#### Fribourg Open Library and Archive",
"files": [
{
diff --git a/pyproject.toml b/pyproject.toml
index fcbfd4c1..c0d91708 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -86,6 +86,7 @@ resources = "sonar.resources.cli:resources"
imports = "sonar.modules.cli.imports:imports"
fixtures = "sonar.modules.cli.fixtures:fixtures"
utils = "sonar.modules.cli.utils:utils"
+sitemap = "sonar.modules.sitemap.cli.sitemap:sitemap"
[tool.poetry.plugins."invenio_base.apps"]
sonar = "sonar.ext:Sonar"
@@ -108,6 +109,7 @@ shibboleth_authenticator = "sonar.modules.shibboleth_authenticator.views.client:
theme = "sonar.theme.views:blueprint"
validation = "sonar.modules.validation.views:blueprint"
users = "sonar.modules.users.views:blueprint"
+sitemap = "sonar.modules.sitemap.views:blueprint"
[tool.poetry.plugins."invenio_base.api_blueprints"]
deposits = "sonar.modules.deposits.rest:api_blueprint"
diff --git a/sonar/config.py b/sonar/config.py
index ac522bf5..42d5edd1 100644
--- a/sonar/config.py
+++ b/sonar/config.py
@@ -183,6 +183,11 @@ def _(x):
'documents-stats': {
'task': ('sonar.modules.stats.tasks.collect_stats'),
'schedule': crontab(minute=0, hour=1), # Every day at 01:00 UTC,
+ },
+ # Sitemap
+ 'sitemap': {
+ 'task': 'sonar.modules.sitemap.tasks.sitemap_generate_task',
+ 'schedule': crontab(minute=0, hour=2), # Every day at 02:00 UTC,
}
}
CELERY_BROKER_HEARTBEAT = 0
diff --git a/sonar/config_sonar.py b/sonar/config_sonar.py
index 98cc2a06..11a78997 100644
--- a/sonar/config_sonar.py
+++ b/sonar/config_sonar.py
@@ -17,11 +17,17 @@
"""Specific configuration SONAR."""
+SONAR_APP_SERVER_NAME = 'sonar.rero.ch'
+
SONAR_APP_API_URL = 'https://localhost:5000/api/'
SONAR_APP_ANGULAR_URL = 'https://localhost:5000/manage/'
"""Link to angular integrated app root."""
+SONAR_APP_PRODUCTION_STATE = False
+
+SONAR_APP_SITEMAP_ENTRY_SIZE = 10000
+
SONAR_APP_LANGUAGES_MAP = {
'aar': 'aa',
'abk': 'ab',
diff --git a/sonar/modules/organisations/api.py b/sonar/modules/organisations/api.py
index 961d4056..4a8b8489 100644
--- a/sonar/modules/organisations/api.py
+++ b/sonar/modules/organisations/api.py
@@ -84,6 +84,19 @@ def get_shared_or_dedicated_list(self):
['pid', 'name', 'isShared',
'isDedicated']).execute().hits
+ def get_organisation_pid_by_server_name(self, server_name):
+ """Get organisation by server_name."""
+ if hits := self.filter('term', serverName=server_name) \
+ .source(['pid']).execute().hits:
+ return hits[0].pid
+
+ def get_dedicated_list(self):
+ """Get the list of dedicated organisations.
+
+ :returns: Iterator of dedicated organisations.
+ """
+ return self.filter('term', isDedicated=True).execute().hits
+
class OrganisationRecord(SonarRecord):
"""Organisation record class."""
diff --git a/sonar/modules/organisations/jsonschemas/organisations/organisation-v1.0.0_src.json b/sonar/modules/organisations/jsonschemas/organisations/organisation-v1.0.0_src.json
index 3c40e5a6..27224558 100644
--- a/sonar/modules/organisations/jsonschemas/organisations/organisation-v1.0.0_src.json
+++ b/sonar/modules/organisations/jsonschemas/organisations/organisation-v1.0.0_src.json
@@ -146,6 +146,29 @@
"hideExpression": "!field.model.isShared"
}
},
+ "serverName": {
+ "title": "Server name (without http)",
+ "description": "Organisation server name for dedicated.",
+ "type": "string",
+ "form": {
+ "hideExpression": "!field.model.isDedicated",
+ "expressionProperties": {
+ "templateOptions.required": "field.model.isDedicated"
+ },
+ "validation": {
+ "validators": {
+ "uniqueValueKeysInObject": {
+ "keys": [
+ "serverName"
+ ]
+ }
+ },
+ "messages": {
+ "uniqueValueKeysInObjectMessage": "This domain name must be unique."
+ }
+ }
+ }
+ },
"allowedIps": {
"title": "Allowed IP addresses",
"description": "List of IP addresses or ranges that allow access to private files (access: embargoed or restricted), which are accessible only within the organisation. Note: the bibliographic record (metadata) is always public. Enter one rule per line.",
@@ -441,6 +464,7 @@
"footer",
"isShared",
"isDedicated",
+ "serverName",
"allowedIps",
"platformName",
"documentsCustomField1",
diff --git a/sonar/modules/organisations/mappings/v7/organisations/organisation-v1.0.0.json b/sonar/modules/organisations/mappings/v7/organisations/organisation-v1.0.0.json
index 892932f6..30edaa79 100644
--- a/sonar/modules/organisations/mappings/v7/organisations/organisation-v1.0.0.json
+++ b/sonar/modules/organisations/mappings/v7/organisations/organisation-v1.0.0.json
@@ -54,6 +54,9 @@
"isDedicated": {
"type": "boolean"
},
+ "serverName": {
+ "type": "keyword"
+ },
"allowedIps": {
"type": "text"
},
diff --git a/sonar/modules/organisations/marshmallow/json.py b/sonar/modules/organisations/marshmallow/json.py
index 0624b608..e501d9e8 100644
--- a/sonar/modules/organisations/marshmallow/json.py
+++ b/sonar/modules/organisations/marshmallow/json.py
@@ -47,6 +47,7 @@ class OrganisationMetadataSchemaV1(StrictKeysMixin):
footer = fields.List(fields.Dict())
isShared = fields.Boolean()
isDedicated = fields.Boolean()
+ serverName = fields.Str(dump_only=True)
allowedIps = SanitizedUnicode()
platformName = SanitizedUnicode()
documentsCustomField1 = fields.Dict()
diff --git a/sonar/modules/sitemap/__init__.py b/sonar/modules/sitemap/__init__.py
new file mode 100644
index 00000000..bd8b87fa
--- /dev/null
+++ b/sonar/modules/sitemap/__init__.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2022 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+"""Sitemap Modules."""
diff --git a/sonar/modules/sitemap/cli/sitemap.py b/sonar/modules/sitemap/cli/sitemap.py
new file mode 100644
index 00000000..4bc4f395
--- /dev/null
+++ b/sonar/modules/sitemap/cli/sitemap.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2022 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+"""Sitemap cli."""
+
+
+import click
+from flask import current_app
+from flask.cli import with_appcontext
+
+from sonar.modules.sitemap.sitemap import sitemap_generate
+
+
+@click.group()
+def sitemap():
+ """Sitemap."""
+
+@sitemap.command()
+@click.option(
+ '-d', '--server-name', 'server_name', required=True, default=None)
+@with_appcontext
+def generate(server_name):
+ """Generate a sitemap.
+
+ :param: server_name: organisation server name.
+ """
+ sitemap_generate(
+ server_name,
+ current_app.config.get('SONAR_APP_SITEMAP_ENTRY_SIZE', 10000)
+ )
+ click.secho(f'Generate sitemap for {server_name}', fg='green')
diff --git a/sonar/modules/sitemap/sitemap.py b/sonar/modules/sitemap/sitemap.py
new file mode 100644
index 00000000..6fb33695
--- /dev/null
+++ b/sonar/modules/sitemap/sitemap.py
@@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2022 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+"""Sitemap."""
+
+import glob
+import math
+import os
+from datetime import datetime
+
+from flask import current_app, url_for
+
+from sonar.modules.documents.api import DocumentSearch
+from sonar.modules.organisations.api import OrganisationSearch
+
+
+def sitemap_generate(server_name, size=10000):
+ """Generate a sitemap.
+
+ :param: server_name: organisation server name.
+ :param: size: size of the set of sitemarp urls.
+ """
+ # Find Organisation by server name and set view and server name
+ search = DocumentSearch()
+ if org_pid := OrganisationSearch() \
+ .get_organisation_pid_by_server_name(server_name):
+ search = search.filter('term', organisation__pid=org_pid)
+ else:
+ server_name = current_app.config.get('SONAR_APP_SERVER_NAME')
+ org_pid = current_app.config.get('SONAR_APP_DEFAULT_ORGANISATION')
+
+ with current_app.test_request_context(f'https://{server_name}'):
+ files_splitted = 0
+ file_name = 'sitemap.xml'
+ folder = ['sitemap']
+ if org_pid != current_app.config.get('SONAR_APP_DEFAULT_ORGANISATION'):
+ folder.append(org_pid)
+ sitemap_folder = os.path.join(current_app.static_folder, *folder)
+ sitemap_file = os.path.join(sitemap_folder, file_name)
+ # Create a destination directory
+ _create_folder_or_remove_files(sitemap_folder)
+
+ if count := search.count():
+ # Elasticsearch query for current organisation
+ hits = search \
+ .sort({'_updated': 'asc'}) \
+ .params(preserve_order=True) \
+ .source(['pid', '_updated']) \
+ .scan()
+
+ if count > size:
+ # In multiple files mode, generate the index
+ files_splitted = math.ceil(count / size);
+ _generate_index_sitemap(sitemap_file, org_pid, files_splitted)
+
+ _generate_sitemap(
+ sitemap_folder, sitemap_file, file_name, org_pid,
+ files_splitted, hits, size)
+
+
+def _create_folder_or_remove_files(sitemap_folder):
+ """Create a folder or remove all files.
+
+ :param: sitemap_folder: folder path.
+ """
+ # Create a destination directory
+ if not os.path.isdir(sitemap_folder):
+ # Recursive
+ os.makedirs(sitemap_folder)
+
+ # remove all files into the current folder
+ for file in glob.glob(f'{sitemap_folder}/*.xml'):
+ os.remove(file)
+
+
+def _get_url_sets(hits, max, org_pid, last=True):
+ """Get url sets."""
+ n = 0
+ for hit in hits:
+ yield {
+ 'loc': url_for(
+ 'invenio_records_ui.doc',
+ view=org_pid,
+ pid_value=hit.pid,
+ _external=True),
+ 'lastmod': datetime.fromisoformat(hit._updated) \
+ .strftime('%Y-%m-%d')
+ }
+ n += 1
+ if not last and n == max:
+ break
+
+
+def _generate_index_sitemap(sitemap_file, org_pid, files_splitted):
+ """Generate sitemap index for more one file of urls.
+
+ :param: sitemap_file: sitemap file path.
+ :param: org_pid: organisation pid.
+ :param: files_splitted: Number of indexes to generate.
+ """
+ def get_splitted_files():
+ for i in range(1, files_splitted+1):
+ yield {
+ "loc": url_for(
+ 'sitemap.sitemap_index',
+ view=org_pid,
+ index=i,
+ _external=True)
+ }
+ template = current_app.jinja_env.get_template(
+ 'sonar/sitemap_index.xml')
+ rv = template.stream(sitemaps=get_splitted_files())
+ rv.dump(sitemap_file)
+
+
+def _generate_sitemap(
+ sitemap_folder, sitemap_file, file_name, org_pid, files_splitted, hits,
+ size):
+ """Generate the sitemap file(s).
+
+ :param: sitemap_folder: destination folder.
+ :param: sitemap_file: file path.
+ :param: file_name: file name.
+ :param: org_pid: Organisation pid.
+ :param: files_splitted: Number of sitemap file to generate.
+ :param: hits: search ES query scan.
+ :param: size: Size of the set.
+ """
+ # Get the template
+ template = current_app.jinja_env.get_template('sonar/sitemap.xml')
+
+ if files_splitted > 1:
+ # Multiple files
+ file = file_name.split('.')
+ for i in range(1, files_splitted+1):
+ file_path = os.path.join(sitemap_folder, f'{file[0]}_{i}.xml')
+ rv = template.stream(
+ urlsets=_get_url_sets(hits, size, org_pid, files_splitted == i))
+ rv.enable_buffering(100)
+ rv.dump(file_path)
+ else:
+ # Single file
+ rv = template.stream(urlsets=_get_url_sets(hits, size, org_pid))
+ rv.enable_buffering(100)
+ rv.dump(sitemap_file)
diff --git a/sonar/modules/sitemap/tasks.py b/sonar/modules/sitemap/tasks.py
new file mode 100644
index 00000000..5baa719d
--- /dev/null
+++ b/sonar/modules/sitemap/tasks.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2022 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+"""Sitemap tasks."""
+
+from celery import shared_task
+from flask import current_app
+
+from sonar.modules.organisations.api import OrganisationSearch
+from sonar.modules.sitemap.sitemap import sitemap_generate
+
+
+@shared_task(ignore_result=True)
+def sitemap_generate_task():
+ """Generate sitemap.
+
+ Used as celery task. "ignore_result" flag means that we don't want to
+ get the status and/or the result of the task, execution is faster.
+ """
+ size = current_app.config.get('SONAR_APP_SITEMAP_ENTRY_SIZE', 10000)
+ # Generate dedicated organisations sitemaps
+ orgs = OrganisationSearch().get_dedicated_list()
+ for org in orgs:
+ if server_name := org.serverName:
+ sitemap_generate(server_name, size)
+
+ # Generate global sitemap
+ sitemap_generate(
+ current_app.config.get('SONAR_APP_DEFAULT_ORGANISATION'), size)
diff --git a/sonar/modules/sitemap/templates/sonar/sitemap.xml b/sonar/modules/sitemap/templates/sonar/sitemap.xml
new file mode 100644
index 00000000..e47c6656
--- /dev/null
+++ b/sonar/modules/sitemap/templates/sonar/sitemap.xml
@@ -0,0 +1,9 @@
+
+
+ {%- for urlset in urlsets %}
+
+ {{ urlset.loc }}
+ {{ urlset.lastmod }}
+
+ {%- endfor %}
+
diff --git a/sonar/modules/sitemap/templates/sonar/sitemap_index.xml b/sonar/modules/sitemap/templates/sonar/sitemap_index.xml
new file mode 100644
index 00000000..4d1f022f
--- /dev/null
+++ b/sonar/modules/sitemap/templates/sonar/sitemap_index.xml
@@ -0,0 +1,8 @@
+
+
+ {%- for sitemap in sitemaps %}
+
+ {{ sitemap.loc }}
+
+ {%- endfor %}
+
diff --git a/sonar/modules/sitemap/views.py b/sonar/modules/sitemap/views.py
new file mode 100644
index 00000000..8a510dfa
--- /dev/null
+++ b/sonar/modules/sitemap/views.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2022 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+"""Sitemap views."""
+
+import os
+
+from flask import Blueprint, Response, abort, current_app
+
+blueprint = Blueprint('sitemap',
+ __name__,
+ template_folder='templates',
+ url_prefix='/')
+
+
+@blueprint.route('//sitemap.xml')
+def sitemap(view):
+ """Get the sitemap file."""
+ return response_file([view,'sitemap.xml'])
+
+
+@blueprint.route('//sitemap_.xml')
+def sitemap_index(view, index):
+ """Get the sitemap index file."""
+ return response_file([view, f'sitemap_{index}.xml'])
+
+
+def response_file(file_path):
+ """Generate the file path and load file."""
+ def stream_file():
+ """Stream file."""
+ with open(sitemap_file, 'r', encoding='utf-8',buffering=100000) as f:
+ yield from f
+
+ sitemap_folder = os.path.join(current_app.static_folder, 'sitemap')
+ sitemap_file = os.path.join(sitemap_folder, *file_path)
+ if not os.path.isdir(sitemap_folder) or not os.path.isfile(sitemap_file):
+ abort(404)
+ return Response(stream_file(), mimetype='application/xml')
diff --git a/sonar/theme/templates/sonar/robot.txt b/sonar/theme/templates/sonar/robot.txt
new file mode 100644
index 00000000..89958e75
--- /dev/null
+++ b/sonar/theme/templates/sonar/robot.txt
@@ -0,0 +1,9 @@
+{%- if not state -%}
+User-agent: *
+Disallow: /
+{%- else -%}
+User-agent: *
+Allow: /
+
+Sitemap: {{ sitemap }}
+{% endif %}
diff --git a/sonar/theme/views.py b/sonar/theme/views.py
index e8a1a433..c234fbed 100644
--- a/sonar/theme/views.py
+++ b/sonar/theme/views.py
@@ -26,6 +26,7 @@
import re
from datetime import datetime
+from urllib.parse import urlparse
import dateutil.parser
import pytz
@@ -44,6 +45,7 @@
from sonar.modules.deposits.permissions import DepositPermission
from sonar.modules.documents.api import DocumentRecord
from sonar.modules.documents.permissions import DocumentPermission
+from sonar.modules.organisations.api import OrganisationSearch
from sonar.modules.organisations.permissions import OrganisationPermission
from sonar.modules.permissions import can_access_manage_view
from sonar.modules.subdivisions.permissions import \
@@ -65,6 +67,33 @@ def init_view():
current_menu.submenu('settings').submenu('admin').hide()
+@blueprint.route('/robot.txt')
+def robot_txt():
+ """Generate dynamically robot.txt."""
+ if not current_app.config.get('SONAR_APP_PRODUCTION_STATE'):
+ # If we are not in production status, we disable all robots
+ return current_app.response_class(
+ response=render_template('sonar/robot.txt', state=False),
+ status=200,
+ mimetype='text/plain')
+ url_data = urlparse(request.url)
+ scheme = url_data.scheme
+ server_name = url_data.netloc.split(':')[0]
+ if org_pid := OrganisationSearch() \
+ .get_organisation_pid_by_server_name(server_name):
+ sitemap = f'{scheme}://{server_name}/{org_pid}/sitemap.xml'
+ else:
+ view = current_app.config.get('SONAR_APP_DEFAULT_ORGANISATION')
+ sitemap = f'{scheme}://{url_data.netloc}/{view}/sitemap.xml'
+ return current_app.response_class(
+ response=render_template(
+ 'sonar/robot.txt',
+ state=current_app.config.get('SONAR_APP_PRODUCTION_STATE'),
+ sitemap=sitemap),
+ status=200,
+ mimetype='text/plain')
+
+
@blueprint.route('/users/profile')
@blueprint.route('/users/profile/')
@login_required
diff --git a/tests/conftest.py b/tests/conftest.py
index 31952e0e..da7120a7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -142,6 +142,10 @@ def app_config(app_config):
app_config['CELERY_BROKER_URL'] = 'memory://'
app_config['CELERY_TASK_ALWAYS_EAGER'] = True
app_config['CELERY_TASK_EAGER_PROPAGATES'] = True
+
+ # Config
+ app_config['SONAR_APP_SERVER_NAME'] = 'sonar.rero.ch'
+ app_config['SONAR_APP_DEFAULT_ORGANISATION'] = 'global'
return app_config
@@ -472,9 +476,8 @@ def _make_document(organisation='org', with_file=False, pid=None):
if organisation:
make_organisation(organisation)
document_json['organisation'] = [{
- '$ref':
- 'https://sonar.ch/api/organisations/org'
- }]
+ '$ref': 'https://sonar.ch/api/organisations/org'}]
+
if pid:
document_json['pid'] = pid
diff --git a/tests/ui/sitemap/test_sitemap.py b/tests/ui/sitemap/test_sitemap.py
new file mode 100644
index 00000000..433d93a5
--- /dev/null
+++ b/tests/ui/sitemap/test_sitemap.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2022 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+"""Test sitemap."""
+
+import os
+import shutil
+import xml.etree.ElementTree as ET
+from datetime import date
+
+from sonar.modules.documents.api import DocumentRecord
+from sonar.modules.sitemap.sitemap import sitemap_generate
+
+
+def test_sitemap(app, db, organisation, document):
+ """Test sitemap generator."""
+ # Set current directory on static_folder config
+ path = os.path.dirname(os.path.realpath(__file__))
+ app.static_folder = path
+ # namespace of sitemap
+ namespace = '{http://www.sitemaps.org/schemas/sitemap/0.9}'
+
+ sitemap_generate('org.domain.com', 10)
+
+ sitemap_file = os.path.join(path, 'sitemap', 'sitemap.xml')
+ assert os.path.isfile(sitemap_file)
+
+ # Control data into the xml file
+ tree = ET.parse(sitemap_file)
+ url = tree.findall(f'{namespace}url')[0]
+ assert 'https://sonar.rero.ch/global/documents/1' == \
+ url.find(f'{namespace}loc').text
+ assert date.today().strftime("%Y-%m-%d") == \
+ url.find(f'{namespace}lastmod').text
+
+
+ # ------- test for a dedicated organisation
+ organisation['isDedicated'] = True
+ organisation['serverName'] = 'org.domain.com'
+ organisation.commit()
+ organisation.reindex()
+ db.session.commit()
+
+ sitemap_generate('org.domain.com', 10)
+
+ sitemap_file = os.path.join(
+ path, 'sitemap', organisation['pid'], 'sitemap.xml')
+ assert os.path.isfile(sitemap_file)
+
+ # Control data into the xml file
+ tree = ET.parse(sitemap_file)
+ url = tree.findall(f'{namespace}url')[0]
+ assert 'https://org.domain.com/org/documents/1' == \
+ url.find(f'{namespace}loc').text
+ assert date.today().strftime("%Y-%m-%d") == \
+ url.find(f'{namespace}lastmod').text
+
+
+ # ------- Generate multiple files with index sitemap
+ document.pop('pid', None)
+ document.pop('_oai', None)
+ document['identifiedBy'] = [{
+ 'value': 'R003415714',
+ 'type': 'bf:Local',
+ 'source': 'RERO'
+ }]
+ doc = DocumentRecord.create(document)
+ doc.reindex()
+ db.session.commit()
+
+ sitemap_generate('org.domain.com', 1)
+
+ sitemap_index = os.path.join(
+ path, 'sitemap', organisation['pid'], 'sitemap.xml')
+ assert os.path.isfile(sitemap_index)
+
+ tree = ET.parse(sitemap_index)
+ sitemaps = tree.findall(f'{namespace}sitemap')
+ for n, sitemap in enumerate(sitemaps, start=1):
+ assert f'https://org.domain.com/org/sitemap_{n}.xml' == \
+ sitemap.find(f'{namespace}loc').text
+
+ for i in range(1, 3):
+ sitemap_file = os.path.join(
+ path, 'sitemap', organisation['pid'], f'sitemap_{i}.xml')
+ assert os.path.isfile(sitemap_file)
+ tree = ET.parse(sitemap_file)
+ url = tree.findall(f'{namespace}url')[0]
+ assert f'https://org.domain.com/org/documents/{i}' == \
+ url.find(f'{namespace}loc').text
+ assert date.today().strftime("%Y-%m-%d") == \
+ url.find(f'{namespace}lastmod').text
+
+ # Remove folder after test
+ sitemap_folder = os.path.join(path, 'sitemap')
+ if os.path.isdir(sitemap_folder):
+ shutil.rmtree(sitemap_folder, ignore_errors=True)
diff --git a/tests/ui/sitemap/test_sitemap_views.py b/tests/ui/sitemap/test_sitemap_views.py
new file mode 100644
index 00000000..058a53f5
--- /dev/null
+++ b/tests/ui/sitemap/test_sitemap_views.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2022 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+"""Test sitemap views."""
+
+import mock
+import pytest
+from flask import Response, url_for
+
+
+def response_urlset():
+ """."""
+ return Response("""
+
+
+
+ http://www.domain.com/document/1
+ 2022-01-01
+
+
+ """)
+
+
+def response_sitemapindex():
+ """."""
+ return Response("""
+
+
+
+ http://www.domain.com/sitemap_1.xml
+
+
+ """)
+
+
+def test_sitemap_file_error(app, client):
+ """File Not Found exception if file doen't exists."""
+ with pytest.raises(Exception):
+ url = url_for('sitemap.sitemap', view='global')
+ client.get(url)
+
+
+
+@mock.patch('sonar.modules.sitemap.views.response_file',
+ mock.MagicMock(return_value=response_urlset()))
+def test_sitemap_file(app, client):
+ """Test entrypoint for sitemap urlset."""
+ url = url_for('sitemap.sitemap', view='global')
+ res = client.get(url)
+ assert res.status_code == 200
+
+
+@mock.patch('sonar.modules.sitemap.views.response_file',
+ mock.MagicMock(return_value=response_sitemapindex()))
+def test_sitemap_index_file(app, client):
+ """Test entrypoint for sitemap index."""
+ url = url_for('sitemap.sitemap_index', view='global', index=1)
+ res = client.get(url)
+ assert res.status_code == 200
diff --git a/tests/ui/test_views.py b/tests/ui/test_views.py
index 71426f47..9f81b766 100644
--- a/tests/ui/test_views.py
+++ b/tests/ui/test_views.py
@@ -35,6 +35,22 @@ def test_error(client):
assert client.get(url_for('sonar.error'))
+def test_robot_txt(app):
+ """Test le robot file."""
+ with app.test_client() as client:
+ url = url_for('sonar.robot_txt')
+ app.config.update(SONAR_APP_PRODUCTION_STATE=False)
+ res = client.get(url)
+ assert res.status_code == 200
+ assert b'User-agent: *\nDisallow: /' in res.data
+
+ app.config.update(SONAR_APP_PRODUCTION_STATE=True)
+ res = client.get(url)
+ assert res.status_code == 200
+ assert b'User-agent: *\nAllow: /\n\n'\
+ b'Sitemap: http://localhost/global/sitemap.xml' in res.data
+
+
def test_admin_record_page(app, admin, user_without_role):
"""Test admin page redirection to defaults."""
with app.test_client() as client: