In [1]:
%load_ext Cython

In [2]:
from __future__ import division, absolute_import, print_function
import numpy as np
import dmft.hirschfye as hf
import dmft.common as gf

In [3]:
beta, d_tau, n_tau = 16, 0.5, 1000
parms = {'BETA': beta, 'N_TAU': n_tau, 'N_MATSUBARA': 64}
tau, w_n = gf.tau_wn_setup(parms)
giw = np.array([gf.greenF(w_n)]*2)
g0t = gf.gw_invfouriertrans(giw, tau, w_n)

L = int(beta/d_tau)
v = hf.ising_v(d_tau, 2, L)
lfak = v.size
g0t = hf.extract_g0t(g0t, lfak)
gind = lfak + np.arange(lfak).reshape(-1, 1)-np.arange(lfak)
g0ttp = g0t[:, gind]

ide = np.eye(lfak)
groot = hf.gnewclean(g0ttp[0], v, ide)
flip = 5
v[0, flip] *= -1

In [4]:
g_flip = hf.gnewclean(g0ttp[0], v, ide)
g_fast_flip = hf.gnew(np.copy(groot), v[0][flip], flip, 1)
print(np.allclose(g_flip, g_fast_flip))

True


In [5]:
tg = np.copy(groot)
%timeit hf.gnew(tg, v[0, flip], flip, 1)

The slowest run took 9.72 times longer than the fastest. This could mean that an intermediate result is being cached 
100000 loops, best of 3: 5.13 µs per loop


In [6]:
%%cython  --annotate
from __future__ import division
from scipy.linalg.blas import dger
import numpy as np
def cygnew(g, v, k, sign):
    dv = sign*v*2
    ee = np.exp(dv)-1.
    a = ee/(1. + (1.-g[k, k])*ee)
    x = a * g[:, k]
    x[k] -= a
    y = 1*g[k, :]
    return dger(1., x, y, 1, 1, g, 1, 1, 1),x,y

In [7]:
g_cyfast_flip,x,y = cygnew(np.copy(groot), v[0][flip], flip, 1)
print(np.allclose(g_flip, g_cyfast_flip))
print(np.allclose(g_fast_flip, g_cyfast_flip))

True
True


In [8]:
%timeit cygnew(tg, v[0, flip], flip, 1)

The slowest run took 18.27 times longer than the fastest. This could mean that an intermediate result is being cached 
100000 loops, best of 3: 6.56 µs per loop


In [9]:
%%cython  --annotate -lcblas
cdef extern from "cblas.h":
    enum CBLAS_ORDER: CblasRowMajor, CblasColMajor
    void lib_dger "cblas_dger"(CBLAS_ORDER Order, int M, int N, double alpha,
                                double *x, int dx, double *y, int dy,
                                double *A, int lda)
import numpy as np
cimport numpy as np
import cython
from libc.math cimport exp

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def cygnew(np.ndarray[np.float64_t, ndim=2] g, double v, int k, double sign):
    cdef double dv, ee, alpha
    cdef int N = g.shape[0]
    dv = sign*v*2
    ee = exp(dv)-1.
    alpha = ee/(1. + (1.-g[k, k])*ee)
    cdef np.ndarray[np.float64_t, ndim=1] x = g[:, k].copy()
    cdef np.ndarray[np.float64_t, ndim=1] y = g[k, :].copy()

    x[k] -= 1.
    lib_dger(CblasColMajor, N, N, alpha,
            &x[0], 1, &y[0], 1, &g[0,0], N)
    return g

In [10]:
g_cyfast_flip1 = cygnew(np.copy(groot), v[0][flip], flip, 1)
print(np.allclose(g_flip, g_cyfast_flip))
print(np.allclose(g_fast_flip, g_cyfast_flip))
print(np.allclose(g_flip, g_cyfast_flip1))
print(np.allclose(g_fast_flip, g_cyfast_flip1))
%timeit cygnew(tg, v[0, flip], flip, 1)

True
True
True
True
The slowest run took 8.20 times longer than the fastest. This could mean that an intermediate result is being cached 
100000 loops, best of 3: 3.78 µs per loop


In [405]:
%%cython  --annotate -lcblas -lgsl
cdef extern from "cblas.h":
    enum CBLAS_ORDER: CblasRowMajor, CblasColMajor
    void lib_dger "cblas_dger"(CBLAS_ORDER Order, int M, int N, double alpha,
                                double *x, int dx, double *y, int dy,
                                double *A, int lda)
import numpy as np
cimport numpy as np
import cython
from libc.math cimport exp

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef cygnew(np.ndarray[np.float64_t, ndim=2] g, double v, int k, double sign):
    cdef double dv, ee, alpha
    cdef int N = g.shape[0]
    dv = sign*v*2
    ee = exp(dv)-1.
    alpha = ee/(1. + (1.-g[k, k])*ee)
    cdef np.ndarray[np.float64_t, ndim=1] x = g[:, k].copy()
    cdef np.ndarray[np.float64_t, ndim=1] y = g[k, :].copy()

    x[k] -= 1.
    lib_dger(CblasColMajor, N, N, alpha,
            &x[0], 1, &y[0], 1, &g[0,0], N)
    return g

cdef extern from "gsl/gsl_rng.h":
    ctypedef struct gsl_rng_type:
        pass
    ctypedef struct gsl_rng:
        pass
    gsl_rng_type *gsl_rng_mt19937
    gsl_rng *gsl_rng_alloc(gsl_rng_type * T)
    double uniform "gsl_rng_uniform"(gsl_rng *r)
    
cdef gsl_rng *r = gsl_rng_alloc(gsl_rng_mt19937)

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def cyupdate(np.ndarray[np.float64_t, ndim=3] g,
             np.ndarray[np.float64_t, ndim=2] v,
             np.ndarray[np.int32_t, ndim=2] i_pairs):
    cdef double dv, ratup, ratdw, rat
    cdef int j, i, up, dw, pair
    pair = i_pairs.shape[0]
    for j in range(g.shape[1]):
        
        for i in xrange(pair):
            up = i_pairs[i, 0]
            dw = i_pairs[i, 1]
            dv = 2.*v[i, j]
            ratup = 1. + (1. - g[up, j, j])*(exp(-dv)-1.)
            ratdw = 1. + (1. - g[dw, j, j])*(exp( dv)-1.)
            rat = ratup * ratdw
            rat = rat/(1.+rat)
            if rat > uniform(r):
                v[i, j] *= -1.
                g[up] = cygnew(g[up], v[i, j], j, 1.)
                g[dw] = cygnew(g[dw], v[i, j], j, -1.)

In [402]:
reload(hf)
g_fast_flip = hf.gnew(groot.copy(), v[0, flip], flip, 1)

#g_fast_flip1 = hf.gnew(groot.copy(), v[0, flip], flip, 1)
g_ffast_flip = cygnew(np.copy(groot), v[0, flip], flip, 1)

print(np.allclose(g_flip, g_fast_flip))
#print(np.allclose(g_flip, g_fast_flip1))
print(np.allclose(g_flip, g_ffast_flip))

True
True


In [396]:
%timeit cyupdate(g0ttp, 

array([[ -2.03589494e-01,   3.41038325e-03,   3.35085668e-04, ...,
         -7.55282136e-02,  -1.27568593e-01,  -2.23344980e-01],
       [  1.90643399e+00,   3.13603890e+00,  -5.93207684e-06, ...,
          6.81591750e-01,   1.20476539e+00,   2.17252994e+00],
       [  1.27888983e-01,   2.15380454e-01,  -2.03439131e-01, ...,
          4.71733106e-02,   8.03120492e-02,   1.41422766e-01],
       ..., 
       [ -3.30853201e-04,   5.02712575e-03,   3.30130773e-04, ...,
         -1.98578023e-01,   5.80687433e-05,  -6.75738680e-03],
       [ -5.89871471e-04,   6.24312421e-03,   4.74424401e-04, ...,
         -1.19252356e-01,  -2.03092351e-01,  -9.23039315e-03],
       [  1.27705388e-02,  -1.18133161e-01,  -8.58131670e-03, ...,
          1.16559905e+00,   1.90813604e+00,   3.26789974e+00]])

In [397]:
groot

array([[ -2.04343393e-01,   2.17992736e-03,   1.54148145e-03, ...,
         -7.57912345e-02,  -1.28045073e-01,  -2.24214961e-01],
       [  1.90187799e+00,   3.12860296e+00,   7.28461243e-03, ...,
          6.80002251e-01,   1.20188591e+00,   2.16727244e+00],
       [  1.28457100e-01,   2.16307690e-01,  -2.04348236e-01, ...,
          4.73715155e-02,   8.06711106e-02,   1.42078358e-01],
       ..., 
       [ -1.59196049e-03,   2.96884478e-03,   2.34816449e-03, ...,
         -1.99017998e-01,  -7.38976620e-04,  -8.21267271e-03],
       [ -1.86047531e-03,   4.16934371e-03,   2.50765457e-03, ...,
         -1.19695645e-01,  -2.03895398e-01,  -1.06966378e-02],
       [  3.38662309e-02,  -8.37024172e-02,  -4.23388077e-02, ...,
          1.17295893e+00,   1.92146894e+00,   3.29224363e+00]])

In [398]:
lib_dger

NameError: name 'lib_dger' is not defined