Skip to content

Commit

Permalink
Update cnx-archive to use python 3 (but still compatible with python 2)
Browse files Browse the repository at this point in the history
  • Loading branch information
karenc committed Oct 21, 2015
1 parent 7c364f0 commit ad8db6c
Show file tree
Hide file tree
Showing 25 changed files with 988 additions and 885 deletions.
18 changes: 10 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
language: python
python:
- "2.7"
- "3.4"
addons:
postgresql: "9.3"
services:
Expand All @@ -19,10 +20,10 @@ before_install:

# Install rhaptos.cnxmlutils (dependency of cnx-cnxml-transforms)
- git clone https://github.com/Connexions/rhaptos.cnxmlutils.git
- cd ./rhaptos.cnxmlutils && python setup.py install && cd ..
- cd ./rhaptos.cnxmlutils && python setup.py install && sudo /usr/bin/python setup.py install && cd ..
# Install cnx-cnxml-transforms
- git clone https://github.com/Connexions/cnx-cnxml-transforms.git
- cd ./cnx-cnxml-transforms && python setup.py install && cd ..
- cd ./cnx-cnxml-transforms && python setup.py install && sudo /usr/bin/python setup.py install && cd ..
# Install cnx-query-grammar
- git clone https://github.com/Connexions/cnx-query-grammar.git
- cd ./cnx-query-grammar && python setup.py install && cd ..
Expand All @@ -34,12 +35,13 @@ before_install:
- pip install coverage
- pip install coveralls

# FIXME patch triggers to include paths to cnx-cnxml-transforms and rhaptos.cnxmlutils
- CNXML_TRANSFORMS_PATH=$(python -c 'import os; import cnxmltransforms; print(os.path.abspath("{}/..".format(cnxmltransforms.__path__[0])))')
- RHAPTOS_CNXMLUTILS_PATH=$(python -c 'import os; import rhaptos.cnxmlutils; print(os.path.abspath("{}/../..".format(rhaptos.cnxmlutils.__path__[0])))')
- LXML_PATH=$(python -c 'import os; import lxml; print(os.path.abspath("{}/..".format(lxml.__path__[0])))')
- cd cnx-cnxml-transforms && python setup.py install && cd ..
- sed -i "s%from cnxmltransforms%import sys; sys.path.append('$CNXML_TRANSFORMS_PATH'); sys.path.append('$RHAPTOS_CNXMLUTILS_PATH'); sys.path.append('$LXML_PATH'); &%" cnxarchive/sql/schema/*.sql
# Doesn't work with python 3
# # FIXME patch triggers to include paths to cnx-cnxml-transforms and rhaptos.cnxmlutils
# - CNXML_TRANSFORMS_PATH=$(python -c 'import os; import cnxmltransforms; print(os.path.abspath("{}/..".format(cnxmltransforms.__path__[0])))')
# - RHAPTOS_CNXMLUTILS_PATH=$(python -c 'import os; import rhaptos.cnxmlutils; print(os.path.abspath("{}/../..".format(rhaptos.cnxmlutils.__path__[0])))')
# - LXML_PATH=$(python -c 'import os; import lxml; print(os.path.abspath("{}/..".format(lxml.__path__[0])))')
# - cd cnx-cnxml-transforms && python setup.py install && cd ..
# - sed -i "s%from cnxmltransforms%import sys; sys.path.append('$CNXML_TRANSFORMS_PATH'); sys.path.append('$RHAPTOS_CNXMLUTILS_PATH'); sys.path.append('$LXML_PATH'); &%" cnxarchive/sql/schema/*.sql
install:
- python setup.py install
before_script:
Expand Down
9 changes: 8 additions & 1 deletion cnxarchive/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
# See LICENCE.txt for details.
# ###
"""Document and collection archive web application."""
from __future__ import unicode_literals
import sys

from pyramid.config import Configurator


Expand All @@ -22,6 +25,7 @@
'cache-control',
'content-type',
]
IS_PY2 = sys.version_info.major == 2


def declare_api_routes(config):
Expand Down Expand Up @@ -72,6 +76,9 @@ def main(global_config, **settings):
if not settings.get(setting, None):
raise ValueError('Missing {} config setting.'.format(setting))

config.scan(ignore='.tests')
if IS_PY2:
config.scan(ignore=b'.tests')
else:
config.scan(ignore='.tests')
config.include('cnxarchive.events.main')
return config.make_wsgi_app()
8 changes: 5 additions & 3 deletions cnxarchive/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
# See LICENCE.txt for details.
# ###
"""Memcached utilities"""

from __future__ import unicode_literals
import base64
import copy

import memcache
from pyramid.threadlocal import get_current_registry

from .search import search as database_search
from .utils import utf8


def search(query, query_type, nocache=False):
Expand All @@ -36,11 +37,11 @@ def search(query, query_type, nocache=False):

# search_key should look something like:
# '"sort:pubDate" "text:college physics" "query_type:weakAND"'
search_key = ' '.join(['"{}"'.format(':'.join(param))
search_key = ' '.join(['"{}"'.format(':'.join(utf8(param)))
for param in search_params])
# since search_key is not a valid memcached key, use base64
# encoding to make it into a valid key
mc_search_key = base64.b64encode(search_key)
mc_search_key = base64.b64encode(search_key.encode('utf-8'))

# look for search results in memcache first, unless nocache
mc = memcache.Client(memcache_servers,
Expand Down Expand Up @@ -71,5 +72,6 @@ def search(query, query_type, nocache=False):
mc.set(mc_search_key, search_results, time=cache_length,
min_compress_len=1024*1024) # compress when > 1MB

mc.disconnect_all()
# return search results
return search_results
2 changes: 1 addition & 1 deletion cnxarchive/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Public License version 3 (AGPLv3).
# See LICENCE.txt for details.
# ###

from __future__ import unicode_literals
import os.path


Expand Down
15 changes: 10 additions & 5 deletions cnxarchive/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
# See LICENCE.txt for details.
# ###
"""Database models and utilities"""
from __future__ import unicode_literals
import datetime
import os
import json
import psycopg2
import re

from . import config
from . import config, IS_PY2
from .utils import split_ident_hash


Expand Down Expand Up @@ -47,7 +48,7 @@ def _read_sql_file(name):


def _read_schema_manifest(manifest_filepath):
with open(os.path.abspath(manifest_filepath), 'rb') as fp:
with open(os.path.abspath(manifest_filepath), 'r') as fp:
raw_manifest = json.loads(fp.read())
manifest = []
relative_dir = os.path.abspath(os.path.dirname(manifest_filepath))
Expand Down Expand Up @@ -76,7 +77,7 @@ def _compile_manifest(manifest, content_modifier=None):
if isinstance(item, list):
items.extend(_compile_manifest(item, content_modifier))
else:
with open(item, 'rb') as fp:
with open(item, 'r') as fp:
content = fp.read()
if content_modifier:
content = content_modifier(item, content)
Expand All @@ -91,7 +92,7 @@ def get_schema():

# Modify the file so that it contains comments that say it's origin.
def file_wrapper(f, c):
return u"-- FILE: {0}\n{1}\n-- \n".format(f, c)
return "-- FILE: {0}\n{1}\n-- \n".format(f, c)

return _compile_manifest(schema_manifest, file_wrapper)

Expand Down Expand Up @@ -140,7 +141,11 @@ def get_tree(ident_hash, cursor):
tree = cursor.fetchone()[0]
except TypeError: # NoneType
raise ContentNotFound()
if type(tree) in (type(''), type(u'')):
if IS_PY2:
string_types = basestring
else:
string_types = (str, bytes)
if isinstance(tree, string_types):
import json
return json.loads(tree)
else:
Expand Down
1 change: 1 addition & 0 deletions cnxarchive/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Public License version 3 (AGPLv3).
# See LICENCE.txt for details.
# ###
from __future__ import unicode_literals
import os.path
import logging
import socket
Expand Down
42 changes: 36 additions & 6 deletions cnxarchive/robots.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# -*- coding: utf-8 -*-
# ###
# Copyright (c) 2015, Rice University
# This software is subject to the provisions of the GNU Affero General
# Public License version 3 (AGPLv3).
# See LICENCE.txt for details.
# ###
from __future__ import unicode_literals

from . import IS_PY2


class Robots(object):
def __init__(self, sitemap='http://cnx.org/sitemap.xml', bots=None):
self.sitemap = sitemap
Expand All @@ -6,14 +18,23 @@ def __init__(self, sitemap='http://cnx.org/sitemap.xml', bots=None):
def add_bot(self, bot_name, delay, pages_to_block):
self.bots.append(Bot(bot_name, delay, pages_to_block))

def __str__(self):
def to_string(self):
ret_str = 'Sitemap: ' + self.sitemap + '\n'
for bot in self.bots:
ret_str += bot.to_string() + '\n'
return ret_str

def to_string(self):
return self.__str__()
def __str__(self):
if IS_PY2:
return self.to_string().encode('utf-8')
return self.to_string()

def __unicode__(self):
# FIXME remove when we no longer need to support python 2
return self.to_string()

def __bytes__(self):
return self.to_string().encode('utf-8')


class Bot(object):
Expand All @@ -22,13 +43,22 @@ def __init__(self, bot_name, delay, pages_to_block):
self.delay = delay
self.blocked = pages_to_block

def __str__(self):
def to_string(self):
ret_str = 'User-agent: ' + self.name + '\n'
if self.delay:
ret_str += 'Crawl-delay: ' + self.delay + '\n'
for page in self.blocked:
ret_str += 'Disallow: ' + page + '\n'
return ret_str

def to_string(self):
return self.__str__()
def __str__(self):
if IS_PY2:
return self.to_string().encode('utf-8')
return self.to_string()

def __unicode__(self):
# FIXME remove when we no longer need to support python 2
return self.to_string()

def __bytes__(self):
return self.to_string().encode('utf-8')
1 change: 1 addition & 0 deletions cnxarchive/scripts/hits_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
The counts are processed into a time range
and inserted into the cnx-archive database.
"""
from __future__ import unicode_literals
import re
import argparse
import gzip
Expand Down
2 changes: 1 addition & 1 deletion cnxarchive/scripts/initializedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# See LICENCE.txt for details.
# ###
"""Commandline script used to initialize the SQL database."""
from __future__ import print_function
from __future__ import print_function, unicode_literals
import os
import sys
import argparse
Expand Down
35 changes: 16 additions & 19 deletions cnxarchive/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# See LICENCE.txt for details.
# ###
"""Database search utilties"""
from __future__ import unicode_literals
import os
import json
import re
Expand Down Expand Up @@ -328,7 +329,7 @@ def __hash__(self):
authors.add(hashabledict(author))

authors = list(authors)
authors.sort(lambda x, y: cmp(y['id'], x['id']))
authors.sort(key=lambda a: a['id'], reverse=True)
setattr(self, attr_name, authors)
return getattr(self, attr_name)

Expand All @@ -348,18 +349,18 @@ def _count_field(self, field_name, sorted=True, max_results=None):

if max_results:
# limit the number of results we return
counts = counts.items()
counts = list(counts.items())
# sort counts by the count with highest count first
counts.sort(lambda a, b: cmp(a[1], b[1]), reverse=True)
counts.sort(key=lambda a: (a[1], a[0].lower()), reverse=True)
counts = counts[:max_results]

if sorted:
if isinstance(counts, dict):
counts = counts.items()
counts = list(counts.items())
# Sort counts by the name alphabetically
counts.sort(lambda a, b: cmp(a[0].lower(), b[0].lower()))
counts.sort(key=lambda a: a[0].lower())
else:
counts = counts.iteritems()
counts = list(counts.items())

return counts

Expand All @@ -384,25 +385,21 @@ def _count_authors(self, max_results=None):
counts[uid] += 1
uid_author.setdefault(uid, author)
authors = []
for uid, count in counts.iteritems():
for uid, count in counts.items():
author = uid_author[uid]
authors.append(((uid, author,), count))

if max_results:
# limit the number of results we return
# sort counts by the count with highest count first
authors.sort(lambda a, b: cmp(a[1], b[1]), reverse=True)
authors.sort(key=lambda a: a[1], reverse=True)
authors = authors[:max_results]

def sort_name(a, b):
def sort_name_key(a):
(uid_a, author_a), count_a = a
(uid_b, author_b), count_b = b
result = cmp(author_a['surname'], author_b['surname'])
if result == 0:
result = cmp(author_a['firstname'], author_b['firstname'])
return result
return (author_a['surname'] or '', author_a['firstname'] or '')
# Sort authors by surname then first name
authors.sort(sort_name)
authors.sort(key=sort_name_key)
authors = [(a[0][0], a[1],) for a in authors]
return authors

Expand All @@ -414,12 +411,12 @@ def _count_publication_year(self):
continue
date = datetime(*strptime(date, "%Y-%m-%dT%H:%M:%SZ")[:6],
tzinfo=FixedOffsetTimezone())
year = unicode(date.astimezone(LOCAL_TZINFO).year)
year = str(date.astimezone(LOCAL_TZINFO).year)
counts.setdefault(year, 0)
counts[year] += 1
counts = counts.items()
counts = list(counts.items())
# Sort pubYear in reverse chronological order
counts.sort(lambda a, b: cmp(a[0], b[0]), reverse=True)
counts.sort(key=lambda a: a[0], reverse=True)
return counts


Expand Down Expand Up @@ -536,7 +533,7 @@ def _build_search(structured_query, weights):
arguments = {}

# Clone the weighted queries for popping.
query_weight_order = DEFAULT_SEARCH_WEIGHTS.keys()
query_weight_order = list(DEFAULT_SEARCH_WEIGHTS.keys())

# Roll over the weight sequence.
query_list = []
Expand Down
Loading

0 comments on commit ad8db6c

Please sign in to comment.