## Combine programming languages

Using `ipystata` requires you to install it + set it up. See the `ipystata` repository for details.

In [18]:
import pandas as pd
import numpy as np

In [19]:
import ipystata

In [23]:
%%stata -os

display "Hello, I am printed by Stata."

Hello, I am printed by Stata.



In [21]:
%%stata -o auto_df
sysuse auto

(1978 Automobile Data)



In [22]:
auto_df.head()

Unnamed: 0,make,price,mpg,rep78,headroom,trunk,weight,length,turn,displacement,gear_ratio,foreign
0,AMC Concord,4099,22,3.0,2.5,11,2930,186,40,121,3.58,Domestic
1,AMC Pacer,4749,17,3.0,3.0,11,3350,173,40,258,2.53,Domestic
2,AMC Spirit,3799,22,,3.0,12,2640,168,35,121,3.08,Domestic
3,Buick Century,4816,20,3.0,4.5,16,3250,196,40,196,2.93,Domestic
4,Buick Electra,7827,15,4.0,4.0,20,4080,222,43,350,2.41,Domestic


In [24]:
auto_df['new_col'] = auto_df['turn'] * auto_df['displacement']

In [25]:
%%stata -d auto_df 
summarize new_col


    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
     new_col |         74    8132.162    4494.148       2686      20400



## Multiprocessing

In [26]:
from joblib import Parallel, delayed
import time

In [27]:
list_of_numbers = list(range(60))

In [28]:
def test_function(number):
    time.sleep(0.5)
    return number*2

In [29]:
%%time
slow_test = []
for number in list_of_numbers:
    slow_test.append(test_function(number))

Wall time: 30 s


In [30]:
%%time
fast_test = Parallel(n_jobs=-1)(delayed(test_function)(number) for number in list_of_numbers) 

Wall time: 3.09 s
