In [28]:
import os
import subprocess
import uuid
import sklearn.datasets as datasets

from feather.compat import guid, tobytes

R_READ_WRITE_TEMPLATE = """
library(feather)
df <- read_feather("{0}")
write_feather(df, "{1}")
"""

def random_path():
    return 'feather_{}'.format(guid())

def remove_paths(paths):
    for x in paths:
        try:
            os.remove(x)
        except os.error:
            pass

def run_rcode(code):
    tmp_r_path = 'test_{0}.R'.format(uuid.uuid4().hex)

    with open(tmp_r_path, 'wb') as f:
        f.write(tobytes(code))

    cmd = ['Rscript', tmp_r_path]
    try:
        subprocess.check_output(cmd)
    finally:
        remove_paths([tmp_r_path])

def roundtrip_r(input_path, output_path):
    code = R_READ_WRITE_TEMPLATE.format(input_path, output_path)
    run_rcode(code)

In [32]:
iris = datasets.load_iris()
print(iris)

{'target_names': array(['setosa', 'versicolor', 'virginica'], 
      dtype='|S10'), 'data': array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2],
       [ 4.6,  3.1,  1.5,  0.2],
       [ 5. ,  3.6,  1.4,  0.2],
       [ 5.4,  3.9,  1.7,  0.4],
       [ 4.6,  3.4,  1.4,  0.3],
       [ 5. ,  3.4,  1.5,  0.2],
       [ 4.4,  2.9,  1.4,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 5.4,  3.7,  1.5,  0.2],
       [ 4.8,  3.4,  1.6,  0.2],
       [ 4.8,  3. ,  1.4,  0.1],
       [ 4.3,  3. ,  1.1,  0.1],
       [ 5.8,  4. ,  1.2,  0.2],
       [ 5.7,  4.4,  1.5,  0.4],
       [ 5.4,  3.9,  1.3,  0.4],
       [ 5.1,  3.5,  1.4,  0.3],
       [ 5.7,  3.8,  1.7,  0.3],
       [ 5.1,  3.8,  1.5,  0.3],
       [ 5.4,  3.4,  1.7,  0.2],
       [ 5.1,  3.7,  1.5,  0.4],
       [ 4.6,  3.6,  1. ,  0.2],
       [ 5.1,  3.3,  1.7,  0.5],
       [ 4.8,  3.4,  1.9,  0.2],
       [ 5. ,  3. ,  1.6,  0.2],
       [ 5. ,  3.4,  1.6,  0.4],
       [ 5.2,  3.

In [29]:
from pandas.util.testing import assert_frame_equal
import pandas as pd

import feather

def test_factor_rep():
    fpath1 = random_path()
    fpath2 = random_path()

    rcode = """
library(feather)
iris <- read_feather("{0}")
iris$Species <- as.factor(as.character(iris$Species))
write_feather(iris, "{1}")
""".format(fpath1, fpath2)
    tmp_paths = []

    try:
        iris = datasets.load_iris()
#        pd.read_csv('iris.csv')
        levels = ['setosa', 'versicolor', 'virginica']

        iris['Species'] = pd.Categorical(iris['Species'], categories=levels)

        feather.write_dataframe(iris, fpath1)
        run_rcode(rcode)

        result = feather.read_dataframe(fpath2)

        tmp_paths.extend([fpath1, fpath2])
        assert_frame_equal(result, iris)
    finally:
        remove_paths(tmp_paths)

In [30]:
test_factor_rep()

KeyError: 'Species'