# Exercise 1: Monte Carlo calculation of Pi
### As you might now a trick to compute $\Pi$ using monte carlo methods it draw $(x,y)\sim \mathcal{U}[-1,1]$ uniformly distributed and count the number of $(x,y)$ coordinates that satisfy $x^2+y^2\leq 1$ divided by the total number of draws. This gives $4\Pi$, see below a sample code that computed this


In [1]:
import numpy as np

def compute_pi_python(num_sim):
    np.random.seed(0)
    x=2.0*(np.random.random(size=num_sim)-0.5)# np.random.random returns U[0,1] so we reescale it to U[-1,1]
    y=2.0*(np.random.random(size=num_sim)-0.5)

    inside=np.sum(x*x+y*y<=1)
    
    
    pi=inside/num_sim*4

    return pi

In [2]:
print(compute_pi_python(100))
print(compute_pi_python(1000))
print(compute_pi_python(10000))
print(compute_pi_python(100000))
print(compute_pi_python(1000000))
print(compute_pi_python(10000000))

3.2
3.076
3.1068
3.13088
3.141688
3.14129


In [3]:
%timeit compute_pi_python(10000000)

181 ms ± 1.68 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Hint: you can use the code below to generate a Uniform [0,1] random variable in Cython

# Solutions

In [4]:
%load_ext cython

In [5]:
%%cython -a --compile-args=-O2
from libc.stdlib cimport rand, RAND_MAX
cimport cython
from libc.math cimport  sqrt,log,exp,erf
@cython.cdivision(True)
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
cdef double uniform_generator() nogil:
    return rand() / (RAND_MAX + 1.0)

@cython.cdivision(True)
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
cpdef double compute_pi_cython(int num_sim)nogil:
    
    cdef long int inside=0
    cdef double x,y
    cdef int i
    for i in range(num_sim):
        x=uniform_generator()
        y=uniform_generator()
        if x*x+y*y<=1.0:
            inside+=1
    return inside/float(num_sim)*4
        


In [6]:
%timeit compute_pi_cython(10000000)

234 ms ± 4.61 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
compute_pi_cython(100000000)

3.1416072

### As you can see is dificult to beat a fully vectorized numpy function! However, note that the cython version is not memory hungry, whereas the numpy version will eventually run out of memory as we keep increasing the size of the simulations

# Exercise 2

### Use the IV code we have created in the lecture notebook to recompute the IV's the we considered in assignement 4 AMZN option dataset:
### a) Using Pandas apply
### b) writting a cython function that vectorises the IV function to take memoryviews or cnp.ndarrays as input
### c) compare the performance

# Solutions

### a)

In [13]:
import pandas as pd
import IV_module
df=pd.read_csv("option_chain_reduced.csv",index_col=0)

In [14]:
df

Unnamed: 0,S,strike,T,option_type,mid_price,DF
0,2959.787822,1820.0,0.009132,C,1141.275,1.000154
1,2959.787822,1830.0,0.009132,C,1125.550,1.000154
2,2959.787822,1840.0,0.009132,C,1118.625,1.000154
3,2959.787822,1850.0,0.009132,C,1110.725,1.000154
4,2959.787822,1870.0,0.009132,C,1089.175,1.000154
...,...,...,...,...,...,...
6032,2918.362500,3600.0,2.214612,P,1070.075,0.982250
6033,2918.362500,3900.0,2.214612,P,1236.275,0.982250
6034,2918.362500,4000.0,2.214612,P,1368.225,0.982250
6035,2918.362500,4800.0,2.214612,P,2025.200,0.982250


In [15]:
def apply_IV(x):
     return IV_module.implied_volatility(price=x[4], F=x[0]/x[5], K=x[1], T=x[2], q=1 if x[3]=="C" else -1)

In [17]:
df['IV']=df.apply(apply_IV,axis=1)

In [18]:
%timeit df.apply(apply_IV,axis=1)

62 ms ± 691 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


### If you remember the previous version was taking around 20 seconds

### b)

In [32]:
!python3 setup_vectorized.py build_ext --inplace

Compiling IV_module_vectorized.pyx because it changed.
[1/1] Cythonizing IV_module_vectorized.pyx
  tree = Parsing.p_module(s, pxd, full_module_name)
running build_ext
building 'IV_module_vectorized' extension
x86_64-linux-gnu-gcc -pthread -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/home/amuguruza/.local/lib/python3.6/site-packages/numpy/core/include -I/usr/include/python3.6m -c IV_module_vectorized.c -o build/temp.linux-x86_64-3.6/IV_module_vectorized.o
x86_64-linux-gnu-gcc -pthread -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/home/amuguruza/.local/lib/python3.6/site-packages/numpy/core/include -I/usr/include/python3.6m -c ./LetsBeRational/lets_be_rational.cpp -o build/temp.linux-x86_64-3.6/./LetsBeRational/lets_be_rational.o
x86_64-linux-gnu-gcc -pthread -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong 

In [33]:
df

Unnamed: 0,S,strike,T,option_type,mid_price,DF,IV
0,2959.787822,1820.0,0.009132,C,1141.275,1.000154,2.235807e+00
1,2959.787822,1830.0,0.009132,C,1125.550,1.000154,-1.797693e+308
2,2959.787822,1840.0,0.009132,C,1118.625,1.000154,-1.797693e+308
3,2959.787822,1850.0,0.009132,C,1110.725,1.000154,2.077509e+00
4,2959.787822,1870.0,0.009132,C,1089.175,1.000154,-1.797693e+308
...,...,...,...,...,...,...,...
6032,2918.362500,3600.0,2.214612,P,1070.075,0.982250,3.707138e-01
6033,2918.362500,3900.0,2.214612,P,1236.275,0.982250,3.350252e-01
6034,2918.362500,4000.0,2.214612,P,1368.225,0.982250,3.662838e-01
6035,2918.362500,4800.0,2.214612,P,2025.200,0.982250,3.571103e-01


### In my case some special casting needs to be done to transform "option_type" into a character so that cython understands the input

In [52]:
np.char.array(df.option_type)

chararray([b'C', b'C', b'C', ..., b'P', b'P', b'P'], dtype='|S1')

In [53]:
import IV_module_vectorized
IV_module_vectorized.implied_volatility_vectorized(df.mid_price.values, df.S.values/df.DF.values, df.strike.values, df['T'].values, np.char.array(df.option_type))

<MemoryView of 'array' at 0x7f8a9c38e6e8>

### Note that Cython is not as "clever" as Numpy/Pandas so you will need to cast each column into an array using .values for this approach to work!

In [54]:
df['IV']=IV_module_vectorized.implied_volatility_vectorized(df.mid_price.values, df.S.values/df.DF.values, df.strike.values, df['T'].values, np.char.array(df.option_type))

In [50]:
%timeit IV_module_vectorized.implied_volatility_vectorized(df.mid_price.values, df.S.values/df.DF.values, df.strike.values, df['T'].values, np.char.array(df.option_type))

2.62 ms ± 38.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### c) In my case you can see a around 24x speed improvement with a pure python vectorized version