Skip to content

Commit

Permalink
Instrument the ORCiD SSO affiliation flow
Browse files Browse the repository at this point in the history
* Existing user with verified ORCiD ID
* Existing user confirmation of linking ORCiD ID
* New user confirmation of account creation with ORCiD ID

Add Oxford
  • Loading branch information
cslzchen authored and mfraezz committed Jun 29, 2022
1 parent 9b399b7 commit a46390e
Show file tree
Hide file tree
Showing 15 changed files with 809 additions and 41 deletions.
4 changes: 4 additions & 0 deletions framework/auth/cas.py
Expand Up @@ -10,6 +10,7 @@
from framework.auth import authenticate, external_first_login_authenticate
from framework.auth.core import get_user, generate_verification_key
from framework.auth.utils import print_cas_log, LogLevel
from framework.celery_tasks.handlers import enqueue_task
from framework.flask import redirect
from framework.exceptions import HTTPError
from website import settings
Expand Down Expand Up @@ -376,6 +377,9 @@ def get_user_from_cas_resp(cas_resp):
external_id=external_credential['id'])
# existing user found
if user:
# Send to celery the following async task to affiliate the user with eligible institutions if verified
from framework.auth.tasks import update_affiliation_for_orcid_sso_users
enqueue_task(update_affiliation_for_orcid_sso_users.s(user._id, external_credential['id']))
return user, external_credential, 'authenticate'
# user first time login through external identity provider
else:
Expand Down
137 changes: 134 additions & 3 deletions framework/auth/tasks.py
@@ -1,11 +1,23 @@
from datetime import datetime
import itertools
import logging

from lxml import etree
import pytz
import requests

from framework import sentry
from framework.celery_tasks import app as celery_app
from website.settings import (DATE_LAST_LOGIN_THROTTLE_DELTA, EXTERNAL_IDENTITY_PROFILE,
ORCID_PUBLIC_API_V3_URL, ORCID_PUBLIC_API_ACCESS_TOKEN,
ORCID_PUBLIC_API_REQUEST_TIMEOUT, ORCID_RECORD_ACCEPT_TYPE,
ORCID_RECORD_EDUCATION_PATH, ORCID_RECORD_EMPLOYMENT_PATH)


from framework.celery_tasks import app
from website.settings import DATE_LAST_LOGIN_THROTTLE_DELTA
logger = logging.getLogger(__name__)


@app.task
@celery_app.task()
def update_user_from_activity(user_id, login_time, cas_login=False, updates=None):
from osf.models import OSFUser
if not updates:
Expand All @@ -27,3 +39,122 @@ def update_user_from_activity(user_id, login_time, cas_login=False, updates=None
should_save = True
if should_save:
user.save()


@celery_app.task()
def update_affiliation_for_orcid_sso_users(user_id, orcid_id):
"""This is an asynchronous task that runs during CONFIRMED ORCiD SSO logins and makes eligible
institution affiliations.
"""
from osf.models import OSFUser
user = OSFUser.load(user_id)
if not user or not verify_user_orcid_id(user, orcid_id):
# This should not happen as long as this task is called at the right place at the right time.
error_message = f'Invalid ORCiD ID [{orcid_id}] for [{user_id}]' if user else f'User [{user_id}] Not Found'
logger.error(error_message)
sentry.log_message(error_message)
return
institution = check_institution_affiliation(orcid_id)
if institution:
logger.info(f'Eligible institution affiliation has been found for ORCiD SSO user: '
f'institution=[{institution._id}], user=[{user_id}], orcid_id=[{orcid_id}]')
if not user.is_affiliated_with_institution(institution):
user.affiliated_institutions.add(institution)


def verify_user_orcid_id(user, orcid_id):
"""Verify that the given ORCiD ID is verified for the given user.
"""
provider = EXTERNAL_IDENTITY_PROFILE.get('OrcidProfile')
status = user.external_identity.get(provider, {}).get(orcid_id, None)
return status == 'VERIFIED'


def check_institution_affiliation(orcid_id):
"""Check user's public ORCiD record and return eligible institution affiliations.
Note: Current implementation only support one affiliation (i.e. loop returns once eligible
affiliation is found, which improves performance). In the future, if we have multiple
institutions using this feature, we can update the loop easily.
"""
from osf.models import Institution
from osf.models.institution import IntegrationType
employment_source_list = get_orcid_employment_sources(orcid_id)
education_source_list = get_orcid_education_sources(orcid_id)
via_orcid_institutions = Institution.objects.filter(
delegation_protocol=IntegrationType.AFFILIATION_VIA_ORCID.value,
is_deleted=False
)
# Check both employment and education records
for source in itertools.chain(employment_source_list, education_source_list):
# Check source against all "affiliation-via-orcid" institutions
for institution in via_orcid_institutions:
if source == institution.orcid_record_verified_source:
logger.debug(f'Institution has been found with matching source: '
f'institution=[{institution._id}], source=[{source}], orcid_id=[{orcid_id}]')
return institution
logger.debug(f'No institution with matching source has been found: orcid_id=[{orcid_id}]')
return None


def get_orcid_employment_sources(orcid_id):
"""Retrieve employment records for the given ORCiD ID.
"""
employment_data = orcid_public_api_make_request(ORCID_RECORD_EMPLOYMENT_PATH, orcid_id)
source_list = []
if employment_data is not None:
affiliation_groups = employment_data.findall('{http://www.orcid.org/ns/activities}affiliation-group')
for affiliation_group in affiliation_groups:
employment_summary = affiliation_group.find('{http://www.orcid.org/ns/employment}employment-summary')
source = employment_summary.find('{http://www.orcid.org/ns/common}source')
source_name = source.find('{http://www.orcid.org/ns/common}source-name')
source_list.append(source_name.text)
return source_list


def get_orcid_education_sources(orcid_id):
"""Retrieve education records for the given ORCiD ID.
"""
education_data = orcid_public_api_make_request(ORCID_RECORD_EDUCATION_PATH, orcid_id)
source_list = []
if education_data is not None:
affiliation_groups = education_data.findall('{http://www.orcid.org/ns/activities}affiliation-group')
for affiliation_group in affiliation_groups:
education_summary = affiliation_group.find('{http://www.orcid.org/ns/education}education-summary')
source = education_summary.find('{http://www.orcid.org/ns/common}source')
source_name = source.find('{http://www.orcid.org/ns/common}source-name')
source_list.append(source_name.text)
return source_list


def orcid_public_api_make_request(path, orcid_id):
"""Make the ORCiD public API request and returned a deserialized response.
"""
request_url = ORCID_PUBLIC_API_V3_URL + orcid_id + path
headers = {
'Accept': ORCID_RECORD_ACCEPT_TYPE,
'Authorization': f'Bearer {ORCID_PUBLIC_API_ACCESS_TOKEN}',
}
try:
response = requests.get(request_url, headers=headers, timeout=ORCID_PUBLIC_API_REQUEST_TIMEOUT)
except Exception:
error_message = f'ORCiD public API request has encountered an exception: url=[{request_url}]'
logger.error(error_message)
sentry.log_message(error_message)
sentry.log_exception()
return None
if response.status_code != 200:
error_message = f'ORCiD public API request has failed: url=[{request_url}], ' \
f'status=[{response.status_code}], response = [{response.content}]'
logger.error(error_message)
sentry.log_message(error_message)
return None
try:
xml_data = etree.XML(response.content)
except Exception:
error_message = 'Fail to read and parse ORCiD record response as XML'
logger.error(error_message)
sentry.log_message(error_message)
sentry.log_exception()
return None
return xml_data
5 changes: 5 additions & 0 deletions framework/auth/views.py
Expand Up @@ -20,6 +20,7 @@
from framework.auth.decorators import block_bing_preview, collect_auth, must_be_logged_in
from framework.auth.forms import ResendConfirmationForm, ForgotPasswordForm, ResetPasswordForm
from framework.auth.utils import ensure_external_identity_uniqueness, validate_recaptcha
from framework.celery_tasks.handlers import enqueue_task
from framework.exceptions import HTTPError
from framework.flask import redirect # VOL-aware redirect
from framework.sessions.utils import remove_sessions_for_user, remove_session
Expand Down Expand Up @@ -672,6 +673,10 @@ def external_login_confirm_email_get(auth, uid, token):
can_change_preferences=False,
)

# Send to celery the following async task to affiliate the user with eligible institutions if verified
from framework.auth.tasks import update_affiliation_for_orcid_sso_users
enqueue_task(update_affiliation_for_orcid_sso_users.s(user._id, provider_id))

# redirect to CAS and authenticate the user with the verification key
return redirect(cas.get_login_url(
service_url,
Expand Down
25 changes: 25 additions & 0 deletions osf/migrations/0245_auto_20220621_1950.py
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.29 on 2022-06-21 19:50
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('osf', '0244_auto_20220517_1718'),
]

operations = [
migrations.AddField(
model_name='institution',
name='orcid_record_verified_source',
field=models.CharField(blank=True, default='', max_length=255),
),
migrations.AlterField(
model_name='institution',
name='delegation_protocol',
field=models.CharField(blank=True, choices=[('saml-shib', 'SAML_SHIBBOLETH'), ('cas-pac4j', 'CAS_PAC4J'), ('oauth-pac4j', 'OAUTH_PAC4J'), ('via-orcid', 'AFFILIATION_VIA_ORCID'), ('', 'NONE')], default='', max_length=15),
),
]
34 changes: 22 additions & 12 deletions osf/models/institution.py
@@ -1,5 +1,6 @@
import logging
from enum import Enum
from future.moves.urllib.parse import urljoin
import logging

from dirtyfields import DirtyFieldsMixin

Expand All @@ -22,6 +23,17 @@
logger = logging.getLogger(__name__)


class IntegrationType(Enum):
"""Defines 5 SSO types for OSF institution integration.
"""

SAML_SHIBBOLETH = 'saml-shib' # SSO via SAML (Shibboleth impl) where CAS serves as the SP and institutions as IdP
CAS_PAC4J = 'cas-pac4j' # SSO via CAS (pac4j impl) where CAS serves as the client and institution as server
OAUTH_PAC4J = 'oauth-pac4j' # SSO via OAuth (pac4j impl) where CAS serves as the client and institution as server
AFFILIATION_VIA_ORCID = 'via-orcid' # Using ORCiD SSO for sign in; using ORCiD public API for affiliation
NONE = '' # Institution affiliation is done via email domain whitelist w/o SSO


class InstitutionManager(models.Manager):

def get_queryset(self):
Expand Down Expand Up @@ -51,18 +63,16 @@ class Institution(DirtyFieldsMixin, Loggable, base.ObjectIDMixin, base.BaseModel
banner_name = models.CharField(max_length=255, blank=True, null=True)
logo_name = models.CharField(max_length=255, blank=True, null=True)

# The protocol which is used to delegate authentication.
# Currently, we have `CAS`, `SAML`, `OAuth` available.
# For `SAML`, we use Shibboleth.
# For `CAS` and `OAuth`, we use pac4j.
# Only institutions with a valid delegation protocol show up on the institution login page.
DELEGATION_PROTOCOL_CHOICES = (
('cas-pac4j', 'CAS by pac4j'),
('oauth-pac4j', 'OAuth by pac4j'),
('saml-shib', 'SAML by Shibboleth'),
('', 'No Delegation Protocol'),
# Institution integration type
delegation_protocol = models.CharField(
choices=[(type.value, type.name) for type in IntegrationType],
max_length=15,
blank=True,
default=''
)
delegation_protocol = models.CharField(max_length=15, choices=DELEGATION_PROTOCOL_CHOICES, blank=True, default='')

# Verified employment/education affiliation source for `via-orcid` institutions
orcid_record_verified_source = models.CharField(max_length=255, blank=True, default='')

# login_url and logout_url can be null or empty
login_url = models.URLField(null=True, blank=True)
Expand Down
2 changes: 2 additions & 0 deletions osf_tests/factories.py
Expand Up @@ -253,6 +253,8 @@ class InstitutionFactory(DjangoModelFactory):
domains = FakeList('url', n=3)
email_domains = FakeList('domain_name', n=1)
logo_name = factory.Faker('file_name')
orcid_record_verified_source = ''
delegation_protocol = ''

class Meta:
model = models.Institution
Expand Down

0 comments on commit a46390e

Please sign in to comment.