# Introduction

This notebook contains the code to obtain ....

In [1]:
NAME = '2_show_stata_and_r'
PROJECT = 'demo_project'
PYTHON_VERSION = '3.6'

## Preamble

### Imports

In [2]:
import os, re, json, time, math
import pandas as pd
import numpy as np
import requests
import deepdish as dd
from tqdm import tqdm_notebook as tqdm

In [3]:
from os.path import join as path

Package to use Stata

In [4]:
import ipystata

Package to use R

In [5]:
%load_ext rpy2.ipython

### Settings

In [6]:
pd.options.mode.chained_assignment = None  # default='warn'
pd.set_option('expand_frame_repr', False)
pd.set_option('float_format',lambda x: '%.5f' % x)
pd.set_option('max_colwidth',200)

Ignore warnings

In [7]:
import warnings
warnings.filterwarnings('ignore')

### Set working directory

In [8]:
workdir = re.sub("(?<=%s)[\w\W]*" % PROJECT, "", os.getcwd())
os.chdir(workdir)

### Set  up pipeline folder if missing

In [9]:
pipeline = path('empirical', '2_pipeline', NAME)
if not os.path.exists(pipeline):
    os.makedirs(pipeline)
    for folder in ['out', 'store', 'tmp']:
        os.makedirs(path(pipeline, folder))

# Main code

### Show iPyStata

In [10]:
%%stata
display "Hello World"


Hello World



In [11]:
%%stata -o auto_df
sysuse auto, clear

(1978 Automobile Data)



In [12]:
auto_df.head()

Unnamed: 0,make,price,mpg,rep78,headroom,trunk,weight,length,turn,displacement,gear_ratio,foreign
0,AMC Concord,4099,22,3.0,2.5,11,2930,186,40,121,3.58,Domestic
1,AMC Pacer,4749,17,3.0,3.0,11,3350,173,40,258,2.53,Domestic
2,AMC Spirit,3799,22,,3.0,12,2640,168,35,121,3.08,Domestic
3,Buick Century,4816,20,3.0,4.5,16,3250,196,40,196,2.93,Domestic
4,Buick Electra,7827,15,4.0,4.0,20,4080,222,43,350,2.41,Domestic


In [13]:
auto_df['new_column'] = auto_df['headroom'] / auto_df['trunk']

In [14]:
%%stata -d auto_df
summarize headroom trunk new_column


    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
    headroom |         74    2.993243    .8459948        1.5          5
       trunk |         74    13.75676    4.277404          5         23
  new_column |         74    .2278575    .0621471       .125         .5



In [15]:
%%stata -d auto_df -o auto_df
gen ln_new_col = ln(new_column)

In [16]:
auto_df[['new_column', 'ln_new_col']].head()

Unnamed: 0,new_column,ln_new_col
0,0.22727,-1.4816
1,0.27273,-1.29928
2,0.25,-1.38629
3,0.28125,-1.26851
4,0.2,-1.60944


### Show Python + Stata + R

In [17]:
%%stata -o data_df
sysuse auto

(1978 Automobile Data)



In [18]:
data_df.head()

Unnamed: 0,make,price,mpg,rep78,headroom,trunk,weight,length,turn,displacement,gear_ratio,foreign
0,AMC Concord,4099,22,3.0,2.5,11,2930,186,40,121,3.58,Domestic
1,AMC Pacer,4749,17,3.0,3.0,11,3350,173,40,258,2.53,Domestic
2,AMC Spirit,3799,22,,3.0,12,2640,168,35,121,3.08,Domestic
3,Buick Century,4816,20,3.0,4.5,16,3250,196,40,196,2.93,Domestic
4,Buick Electra,7827,15,4.0,4.0,20,4080,222,43,350,2.41,Domestic


In [19]:
%%R -i data_df -o data_df
data_df['new_col'] <- data_df['trunk'] - 1

In [20]:
data_df[['trunk', 'new_col']].head()

Unnamed: 0,trunk,new_col
0,11,10.0
1,11,10.0
2,12,11.0
3,16,15.0
4,20,19.0


# Sandbox