Skip to content

Commit

Permalink
Merge branch 'feature/slimmer-deps' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
felliott committed Sep 7, 2016
2 parents d8df6e2 + 8d3c485 commit f40ca7b
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 32 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ install:
- travis_retry pip install --upgrade pip
- travis_retry pip install wheel==0.26.0
- travis_retry pip install invoke==0.11.1
- sed 's/^rpy2==/#rpy2==/' -i requirements.txt
- travis_retry invoke wheelhouse --develop
- travis_retry invoke install --develop

Expand Down
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ RUN apt-get update \
# unoconv dependencies
&& apt-get install -y \
unoconv \
# pspp dependencies
&& apt-get install -y \
pspp \
&& apt-get clean \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ For MacOSX users:

```bash
brew install python3
brew install r
brew install pspp
```
For Ubuntu users:

```bash
apt-get install python3
apt-get install r-base
apt-get install pspp
```

After installing python3.5, create the virtual environment with the following commands:
Expand Down
2 changes: 1 addition & 1 deletion docs/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Or download one of the following:
* tarball_
* zipball_

Make sure that you have installed R, are using python3.5, and have installed invoke for your current python3 version.
Make sure that you have installed pspp, are using python3.5, and have installed invoke for your current python3 version.

Install the version of invoke found in the requirements.txt file. Currently 0.11.1

Expand Down
11 changes: 6 additions & 5 deletions mfr/extensions/ipynb/render.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os

import IPython
from IPython import nbformat
from IPython.config import Config
from IPython.nbconvert.exporters import HTMLExporter
import nbformat
import nbconvert
from traitlets.config import Config
from mako.lookup import TemplateLookup
from nbconvert.exporters import HTMLExporter

from mfr.core import extension
from mfr.extensions.ipynb import exceptions
Expand All @@ -19,7 +19,8 @@ class IpynbRenderer(extension.BaseRenderer):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.metrics.add('ipython_version', IPython.__version__)
self.metrics.add('nbformat_version', nbformat.__version__)
self.metrics.add('nbconvert_version', nbconvert.__version__)

def render(self):
try:
Expand Down
4 changes: 4 additions & 0 deletions mfr/extensions/tabular/libs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ def dta_pandas():
return dta_pandas


def sav_stdlib():
from ..libs.stdlib_tools import sav_stdlib
return sav_stdlib

def sav_pandas():
from ..libs.panda_tools import sav_pandas
return sav_pandas
Expand Down
25 changes: 9 additions & 16 deletions mfr/extensions/tabular/libs/panda_tools.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from tempfile import NamedTemporaryFile

import numpy
import pandas
from tempfile import NamedTemporaryFile
from ..utilities import header_population, strip_comments

from mfr.extensions.tabular.utilities import header_population, strip_comments, sav_to_csv


def csv_pandas(fp):
Expand Down Expand Up @@ -36,11 +38,14 @@ def dta_pandas(fp):


def sav_pandas(fp):
"""Read and convert a sav file to JSON format using the pandas library
"""Read and convert a .sav file to a .csv file via pspp, then convert that to JSON format
using the pandas library
:param fp: File pointer object
:return: tuple of table headers and data
"""
dataframe = robjectify(fp)
csv_file = sav_to_csv(fp)
dataframe = pandas.read_csv(csv_file.name, low_memory=False)
return data_from_dataframe(dataframe)


Expand All @@ -64,15 +69,3 @@ def data_from_dataframe(dataframe):
data_row[name] = value
data.append(data_row)
return {'Sheet 1': (header, data)}


def robjectify(fp):
"""Create a dataframe object using R"""

import pandas.rpy.common as common
import rpy2.robjects as robjects
r = robjects
r.r("require(foreign)")
r.r('x <- read.spss("{}",to.data.frame=T)'.format(fp.name))
r.r('row.names(x) = 0:(nrow(x)-1)')
return common.load_data('x')
17 changes: 16 additions & 1 deletion mfr/extensions/tabular/libs/stdlib_tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import csv
import re
import csv

from ..exceptions import EmptyTableException
from mfr.extensions.tabular import utilities


def csv_stdlib(fp):
Expand Down Expand Up @@ -42,6 +44,19 @@ def csv_stdlib(fp):
return {'Sheet 1': (columns, rows)}


def sav_stdlib(fp):
"""Read and convert a .sav file to .csv with pspp, then convert that to JSON format using
the python standard library
:param fp: File pointer object to a .sav file
:return: tuple of table headers and data
"""
csv_file = utilities.sav_to_csv(fp)
with open(csv_file.name, 'r') as file:
csv_file.close()
return csv_stdlib(file)


def _set_dialect_quote_attrs(dialect, data):
"""Set quote-related dialect attributes based on up to 2kb of csv data.
Expand Down
4 changes: 3 additions & 1 deletion mfr/extensions/tabular/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
'.xlsx': [libs.xlsx_xlrd],
'.xls': [libs.xlsx_xlrd],
'.dta': [libs.dta_pandas],
'.sav': [libs.sav_pandas],
'.sav': [libs.sav_stdlib],
# '.ods': [libs.ods_ezodf],
})

Expand All @@ -38,3 +38,5 @@
'syncColumnCellResize': True,
'multiColumnSort': True,
})

PSPP_CONVERT_BIN = config.get('PSPP_CONVERT_BIN', '/usr/bin/pspp-convert')
24 changes: 23 additions & 1 deletion mfr/extensions/tabular/utilities.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import re
import subprocess
from tempfile import NamedTemporaryFile

from mfr.extensions.tabular import compat
from mfr.core import exceptions
from mfr.extensions.tabular import compat, settings


def header_population(headers):
Expand Down Expand Up @@ -35,3 +38,22 @@ def strip_comments(src, dest):
data = data.encode('utf-8', 'ignore')
dest.write(data)
dest.seek(0)


def sav_to_csv(fp):
"""Converts a SPSS .sav to a .csv file by calling out to ``pspp-convert``.
:param fp: file pointer object to .sav file
:return: file pointer to .csv file. You are responsible for closing this.
"""
csv_file = NamedTemporaryFile(mode='w+b', suffix='.csv')
try:
subprocess.check_call([
settings.PSPP_CONVERT_BIN,
fp.name,
csv_file.name,
])
except subprocess.CalledProcessError:
raise exceptions.ExporterError(
'Unable to convert the SPSS file to CSV, please try again later.', code=400)
return csv_file
7 changes: 3 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ pydocx==0.7.0
Pillow==2.8.2

# IPython
ipython==3.1.0
nbconvert==4.2.0
nbformat==4.1.0
traitlets==4.2.2
jsonschema==2.4.0
jinja2==2.7.3
mistune==0.7
Expand All @@ -37,9 +39,6 @@ docutils==0.12
pandas==0.17.1
git+https://github.com/icereval/xlrd.git

# Rpy
rpy2==2.7.8

# Md
markdown==2.6.2

Expand Down

0 comments on commit f40ca7b

Please sign in to comment.