Skip to content

Commit

Permalink
Release 2.14.0
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Jan 11, 2018
2 parents 75c439d + e6651dd commit 3289e64
Show file tree
Hide file tree
Showing 15 changed files with 432 additions and 213 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Change Log

# [2.14.0] - 2018-01-10
## Added
* Allow reading/writing `Source.canonical` at `/api/v2/sources/`
* Include `<author>` in atom feed at `/api/v2/atom/`
* ScholarsArchive@OSU source config for their new API

## Changed
* Prevent OSF harvester from being throttled
* Update NSFAwards harvester/transformer to include more fields

# [2.13.1] - 2018-01-04
## Fixed
* Use request context to build URLs in the API instead of SHARE_API_URL setting
Expand Down
15 changes: 9 additions & 6 deletions api/sources/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class UpdateSourceSerializer(ShareSerializer):

class Meta:
model = models.Source
fields = ('name', 'home_page', 'long_title', 'icon', 'icon_url', 'user', 'source_configs', 'url')
fields = ('name', 'home_page', 'long_title', 'canonical', 'icon', 'icon_url', 'user', 'source_configs', 'url')
read_only_fields = ('icon', 'user', 'source_configs', 'url')
view_name = 'api:source-detail'

Expand Down Expand Up @@ -87,19 +87,19 @@ class Meta(UpdateSourceSerializer.Meta):

def create(self, validated_data):
icon_url = validated_data.pop('icon_url')
long_title = validated_data.pop('long_title')

icon_file = self._fetch_icon_file(icon_url)
long_title = validated_data['long_title']

label = long_title.replace(' ', '_').lower()

name = validated_data.get('name', label)
name = validated_data.pop('name', label)

with transaction.atomic():
source, created = models.Source.objects.get_or_create(
long_title=long_title,
defaults={
'home_page': validated_data.get('home_page', None),
'name': name,
**validated_data
}
)
if not created:
Expand All @@ -120,4 +120,7 @@ def _create_trusted_user(self, username):

user_serializer.is_valid(raise_exception=True)

return user_serializer.save()
user = user_serializer.save()
user.set_unusable_password()
user.save()
return user
17 changes: 17 additions & 0 deletions api/views/feeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,23 @@ def item_link(self, item):
# Link to SHARE curate page
return '{}{}/{}'.format(settings.SHARE_WEB_URL, item.get('type').replace(' ', ''), item.get('id'))

def item_author_name(self, item):
contributor_list = item.get('lists', []).get('contributors', [])
creators = [c for c in contributor_list if 'order_cited' in c]

authors = sorted(
creators,
key=lambda x: x['order_cited'],
reverse=False
) if creators else contributor_list

if not authors:
return 'No authors provided.'
elif len(authors) > 1:
return prepare_string('{} et al.'.format(authors[0]['cited_as']))
else:
return prepare_string(authors[0]['cited_as'])

def item_pubdate(self, item):
return parse_date(item.get('date_published'))

Expand Down
2 changes: 2 additions & 0 deletions project/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,8 @@ def split(string, delim):
SHARE_USER_AGENT = os.environ.get('SHARE_USER_AGENT', 'SHAREbot/{} (+{})'.format(VERSION, SHARE_WEB_URL))

OSF_API_URL = os.environ.get('OSF_API_URL', 'https://staging-api.osf.io').rstrip('/') + '/'
OSF_BYPASS_THROTTLE_TOKEN = os.environ.get('BYPASS_THROTTLE_TOKEN', None)

DOI_BASE_URL = os.environ.get('DOI_BASE_URL', 'http://dx.doi.org/')

ALLOWED_TAGS = ['abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'strong', 'ul']
Expand Down
78 changes: 12 additions & 66 deletions share/admin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from share.models.change import ChangeSet
from share.models.core import NormalizedData, ShareUser
from share.models.creative import AbstractCreativeWork
from share.models.ingest import RawDatum, Source, SourceConfig, Harvester, Transformer
from share.models.ingest import RawDatum, Source, SourceConfig, Harvester, Transformer, SourceUniqueIdentifier
from share.models.logs import HarvestLog
from share.models.meta import Subject, SubjectTaxonomy
from share.models.registration import ProviderRegistration
Expand Down Expand Up @@ -77,6 +77,8 @@ class RawDatumAdmin(admin.ModelAdmin):
show_full_result_count = False
list_select_related = ('suid__source_config', )
list_display = ('id', 'identifier', 'source_config_label', 'datestamp', 'date_created', 'date_modified', )
readonly_fields = ('datum', 'sha256')
raw_id_fields = ('suid', 'logs')

def identifier(self, obj):
return obj.suid.identifier
Expand All @@ -86,6 +88,7 @@ def source_config_label(self, obj):


class AccessTokenAdmin(admin.ModelAdmin):
raw_id_fields = ('user',)
list_display = ('token', 'user', 'scope')


Expand Down Expand Up @@ -192,7 +195,7 @@ def clean(self):
class SourceConfigAdmin(admin.ModelAdmin):
list_display = ('label', 'source_', 'version', 'enabled', 'source_config_actions')
list_select_related = ('source',)
readonly_fields = ('source_config_actions',)
readonly_fields = ('source_config_actions', 'source',)
search_fields = ['label', 'source__name', 'source__long_title']

def source_(self, obj):
Expand Down Expand Up @@ -249,71 +252,9 @@ def harvest(self, request, config_id):
return TemplateResponse(request, 'admin/harvest.html', context)


class SourceAdminInline(admin.StackedInline):
model = Source


class ShareUserAdmin(admin.ModelAdmin):
inlines = (SourceAdminInline,)


class SourceAddForm(forms.ModelForm):

title = forms.CharField(min_length=3, max_length=255, help_text='What this source will be displayed as to the end user. Must be unique.')
url = forms.URLField(min_length=11, max_length=255, help_text='The home page or canonical URL for this source, make sure it is unique!.\nThe reverse DNS notation prepended with "sources." will be used to create a user for this source. IE share.osf.io -> sources.io.osf.share')

class Meta:
model = Source
exclude = ('access_token', )
fields = ('title', 'url', 'icon', )

def validate_unique(self):
# Forces validation checks on every field
try:
self.instance.validate_unique()
except forms.ValidationError as e:
# Translate field names because I'm a bad person
if 'long_title' in e.error_dict:
e.error_dict['title'] = e.error_dict.pop('long_title')
if 'name' in e.error_dict:
e.error_dict['url'] = e.error_dict.pop('name')
if 'home_page' in e.error_dict:
e.error_dict['url'] = e.error_dict.pop('home_page')
self._update_errors(e)

def _post_clean(self):
if not self._errors:
self.instance.home_page = self.cleaned_data['url'].lower().strip('/')
self.instance.long_title = self.cleaned_data.pop('title')
self.instance.name = '.'.join(reversed(self.instance.home_page.split('//')[1].split('.')))
return super()._post_clean()

def save(self, commit=True):
instance = super().save(commit=False)

user = ShareUser.objects.create_user(username='sources.' + instance.name, save=commit)
user.set_unusable_password()
instance.user = user

if commit:
instance.save()

return instance


class SourceAdmin(admin.ModelAdmin):
search_fields = ('name', 'long_title')
readonly_fields = ('access_token', )

def add_view(self, *args, **kwargs):
self.form = SourceAddForm
return super().add_view(*args, **kwargs)

def save_model(self, request, obj, form, change):
if obj.user and not (obj.user_id or obj.user.id):
obj.user.save()
obj.user_id = obj.user.id # Django is weird
obj.save()
readonly_fields = ('access_token', 'user')

def access_token(self, obj):
tokens = obj.user.accesstoken_set.all()
Expand Down Expand Up @@ -381,6 +322,10 @@ def grade_(self, obj):
)


class SourceUniqueIdentifierAdmin(admin.ModelAdmin):
readonly_fields = ('identifier', 'source_config')


admin.site.unregister(AccessToken)
admin.site.register(AccessToken, AccessTokenAdmin)

Expand All @@ -392,9 +337,10 @@ def grade_(self, obj):
admin.site.register(SiteBanner, SiteBannerAdmin)

admin.site.register(Harvester)
admin.site.register(ShareUser, ShareUserAdmin)
admin.site.register(ShareUser)
admin.site.register(Source, SourceAdmin)
admin.site.register(SourceConfig, SourceConfigAdmin)
admin.site.register(SubjectTaxonomy, SubjectTaxonomyAdmin)
admin.site.register(SourceStat, SourceStatAdmin)
admin.site.register(SourceUniqueIdentifier, SourceUniqueIdentifierAdmin)
admin.site.register(Transformer)
54 changes: 53 additions & 1 deletion share/harvesters/gov_nsfawards.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,67 @@
logger = logging.getLogger(__name__)


NSF_FIELDS = [
'id',
'agency',
'awardeeCity',
'awardeeCountryCode',
'awardeeCounty',
'awardeeDistrictCode',
'awardeeName',
'awardeeStateCode',
'awardeeZipCode',
'cfdaNumber',
'coPDPI',
'date',
'startDate',
'expDate',
'estimatedTotalAmt',
'fundsObligatedAmt',
'dunsNumber',
'fundProgramName',
'parentDunsNumber',
'pdPIName',
'perfCity',
'perfCountryCode',
'perfCounty',
'perfDistrictCode',
'perfLocation',
'perfStateCode',
'perfZipCode',
'poName',
'primaryProgram',
'transType',
'title',
'awardee',
'poPhone',
'poEmail',
'awardeeAddress',
'perfAddress',
'publicationResearch',
'publicationConference',
'fundAgencyCode',
'awardAgencyCode',
'projectOutComesReport',
'abstractText',
'piFirstName',
'piMiddeInitial',
'piLastName',
'piPhone',
'piEmail'
]


class NSFAwardsHarvester(BaseHarvester):
VERSION = 1
VERSION = 2

def do_harvest(self, start_date: pendulum.Pendulum, end_date: pendulum.Pendulum) -> Iterator[Tuple[str, Union[str, dict, bytes]]]:
url = furl(self.config.base_url)

url.args['dateStart'] = start_date.date().strftime('%m/%d/%Y')
url.args['dateEnd'] = end_date.date().strftime('%m/%d/%Y')
url.args['offset'] = 0
url.args['printFields'] = ','.join(NSF_FIELDS)

return self.fetch_records(url)

Expand Down
4 changes: 4 additions & 0 deletions share/harvesters/io_osf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ class OSFHarvester(BaseHarvester):
VERSION = 1

def build_url(self, start_date, end_date):
# so prod SHARE doesn't get throttled
if settings.OSF_BYPASS_THROTTLE_TOKEN:
self.session.headers.update({'X-THROTTLE-TOKEN': settings.OSF_BYPASS_THROTTLE_TOKEN})

url = furl(settings.OSF_API_URL + self.kwargs['path'])
url.args['page[size]'] = 100
# url.args['filter[public]'] = 'true'
Expand Down
Loading

0 comments on commit 3289e64

Please sign in to comment.