In [19]:
import pandas as pd
import rpy2
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter

print('rpy2 package version: {0}'.format(rpy2.__version__))

rpy2 package version: 3.1.0


In [5]:
def pd_read_rdata(file_name, dataset_name=None, df_names=False):
    
    '''
    Read and convert .RDATA to pandas DataFrame.
    
    Parameters:
    
        file_name(str): Name of RDATA file.

        dataset_name(str): Dataset name to be open.

        df_names(bool): True if you want to see the dataset names.
    
    Return:
    
        df: Pandas data frame.
        
        names: If df_names is True, return a list of dataset names.
    
    '''
    
    if df_names == True:
        
        r_df = ro.r['load'](file_name)

        with localconverter(ro.default_converter + pandas2ri.converter):
            df_names = ro.conversion.rpy2py(r_df)
        
        names = list(df_names)
        
        return names
    
    if dataset_name == None:
    
        r_df = ro.r['load'](file_name)
        
        with localconverter(ro.default_converter + pandas2ri.converter):
            df = ro.conversion.rpy2py(r_df)
          
        return df
        
            
    elif dataset_name != None:
        
        ro.r['load'](file_name)
        
        r_df = ro.r(dataset_name)
        
        with localconverter(ro.default_converter + pandas2ri.converter):
            df = ro.conversion.rpy2py(r_df)
        
        return df

## Open the Fuel Economy dataset

In [7]:
df_names = pd_read_rdata('FuelEconomy.RData', df_names=True)
df_names

['cars2010', 'cars2011', 'cars2012']

In [12]:
pd_read_rdata('FuelEconomy.RData', df_names[0])

Unnamed: 0,Protocol,Compounds,InputFields,Iterations,NumPending,Hour,Day,Class
1,E,997.0,137.0,20.0,0.0,14.000000,Tue,F
2,E,97.0,103.0,20.0,0.0,13.816667,Tue,VF
3,E,101.0,75.0,10.0,0.0,13.850000,Thu,VF
4,E,93.0,76.0,20.0,0.0,10.100000,Fri,VF
5,E,100.0,82.0,20.0,0.0,10.366667,Fri,VF
...,...,...,...,...,...,...,...,...
4327,O,967.0,611.0,20.0,0.0,12.633333,Mon,L
4328,O,972.0,613.0,20.0,0.0,12.616667,Mon,L
4329,O,963.0,607.0,200.0,0.0,4.066667,Thu,L
4330,O,964.0,607.0,200.0,0.0,3.616667,Fri,L


## Open the Scheduling dataset

In [9]:
df_names = pd_read_rdata('schedulingData.RData', df_names=True)
df_names

['schedulingData']

In [11]:
pd_read_rdata('FuelEconomy.RData', df_names[0])

Unnamed: 0,Protocol,Compounds,InputFields,Iterations,NumPending,Hour,Day,Class
1,E,997.0,137.0,20.0,0.0,14.000000,Tue,F
2,E,97.0,103.0,20.0,0.0,13.816667,Tue,VF
3,E,101.0,75.0,10.0,0.0,13.850000,Thu,VF
4,E,93.0,76.0,20.0,0.0,10.100000,Fri,VF
5,E,100.0,82.0,20.0,0.0,10.366667,Fri,VF
...,...,...,...,...,...,...,...,...
4327,O,967.0,611.0,20.0,0.0,12.633333,Mon,L
4328,O,972.0,613.0,20.0,0.0,12.616667,Mon,L
4329,O,963.0,607.0,200.0,0.0,4.066667,Thu,L
4330,O,964.0,607.0,200.0,0.0,3.616667,Fri,L
