In [1]:
%load_ext Cython

In [2]:
import numpy as np
import pandas as pd
from collections import defaultdict

In [3]:
y = np.random.randint(2, size=(5000, 1))
x = np.random.randint(10, size=(5000, 1))
data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x'])
data_np = data.values

In [4]:
def target_mean_v1(data, y_name, x_name):
    result = np.zeros(data.shape[0])
    for i in range(data.shape[0]):
        groupby_result = data[data.index != i].groupby(
            [x_name], as_index=False).agg(['mean', 'count'])
        result[i] = groupby_result.loc[groupby_result.index ==
                                       data.loc[i, x_name], (y_name, 'mean')]
    return result

In [5]:
%%time
result_1 = target_mean_v1(data, 'y', 'x')

CPU times: user 15.6 s, sys: 40 ms, total: 15.6 s
Wall time: 15.7 s


In [6]:
def target_mean_v2(data, y_name, x_name):
    result = np.zeros(data.shape[0])
    value_dict = dict()
    count_dict = dict()
    for i in range(data.shape[0]):
        if data.loc[i, x_name] not in value_dict.keys():
            value_dict[data.loc[i, x_name]] = data.loc[i, y_name]
            count_dict[data.loc[i, x_name]] = 1
        else:
            value_dict[data.loc[i, x_name]] += data.loc[i, y_name]
            count_dict[data.loc[i, x_name]] += 1
    for i in range(data.shape[0]):
        result[i] = (value_dict[data.loc[i, x_name]] - data.loc[i,
                                                                y_name]) / (count_dict[data.loc[i, x_name]] - 1)
    return result

In [7]:
%%timeit
result_2 = target_mean_v2(data, 'y', 'x')

233 ms ± 1.59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
def target_mean_v3(y_np, x_np):
    length = len(x_np)
    result = np.zeros(length)
    value_dict = defaultdict(int)
    count_dict = defaultdict(int)
    for i in range(length):
        value_dict[x_np[i]] += y_np[i]
        count_dict[x_np[i]] += 1
    for i in range(length):
        result[i] = (value_dict[x_np[i]] - y_np[i]) / (count_dict[x_np[i]] - 1)
    return result

In [9]:
%%timeit
result_3 = target_mean_v3(data['y'].values, data['x'].values)

8.38 ms ± 44 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [10]:
def target_mean_v3(y_np, x_np):
    length = len(x_np)
    result = np.zeros(length)
    value_dict = defaultdict(int)
    count_dict = defaultdict(int)
    for x, y in zip(x_np, y_np):
        value_dict[x] += y
        count_dict[x] += 1
    result = [(value_dict[x] - y) / (count_dict[x] - 1) for x, y in zip(x_np, y_np)]
    return result

In [11]:
%%timeit
result_3 = target_mean_v3(data['y'].values, data['x'].values)

6.17 ms ± 67.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [12]:
result_3 = target_mean_v3(data['y'].values, data['x'].values)

In [13]:
np.linalg.norm(result_1 - result_3)

0.0

In [14]:
%%cython --cplus

cimport numpy as cnp
from libcpp.vector cimport vector
from libcpp.unordered_map cimport unordered_map

cpdef target_mean_v3cy(cnp.ndarray[long] y_np, cnp.ndarray[long] x_np, cnp.ndarray[double] result):
    cdef int length = y_np.shape[0]
    cdef unordered_map[int, int] value_dict
    cdef unordered_map[int, int] count_dict
    cdef int i
    for i in range(length):
        value_dict[x_np[i]] = value_dict[x_np[i]] + y_np[i]
        count_dict[x_np[i]] = count_dict[x_np[i]] + 1
    for i in range(length):
        result[i] = (value_dict[x_np[i]] - y_np[i]) / (count_dict[x_np[i]] - 1)
    return result

In [15]:
%%timeit
result_tmp = np.zeros(data.shape[0])
result_3cy = target_mean_v3cy(data['y'].values, data['x'].values, result_tmp)

180 µs ± 721 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [16]:
result_tmp = np.zeros(data.shape[0])
result_3cy = target_mean_v3cy(data['y'].values, data['x'].values, result_tmp)

In [17]:
np.linalg.norm(result_1 - result_3cy)

0.0

In [20]:
%%cython --cplus

cimport cython
cimport numpy as cnp
from libcpp.unordered_map cimport unordered_map

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef target_mean_v3mv(cnp.ndarray[long] y_np, cnp.ndarray[long] x_np, cnp.ndarray[double] result):
    cdef int length = y_np.shape[0]
    cdef unordered_map[int, int] value_dict
    cdef unordered_map[int, int] count_dict
    cdef int i
    for i in range(length):
        value_dict[x_np[i]] = value_dict[x_np[i]] + y_np[i]
        count_dict[x_np[i]] = count_dict[x_np[i]] + 1
    for i in range(length):
        result[i] = (value_dict[x_np[i]] - y_np[i]) / (count_dict[x_np[i]] - 1)
    return result

In [21]:
%%timeit
result_tmp = np.zeros(data.shape[0])
result_3mv = target_mean_v3mv(data['y'].values, data['x'].values, result_tmp)

183 µs ± 2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [22]:
result_tmp = np.zeros(data.shape[0])
result_3mv = target_mean_v3mv(data['y'].values, data['x'].values, result_tmp)
np.linalg.norm(result_1 - result_3mv)

0.0

In [23]:
%%cython --cplus

cimport cython
cimport numpy as cnp
from libcpp.unordered_map cimport unordered_map

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef target_mean_v3mv2(long[::1] y_np, long[::1] x_np, double[::1] result, int length):
    cdef unordered_map[int, int] value_dict
    cdef unordered_map[int, int] count_dict
    cdef int i
    for i in range(length):
        value_dict[x_np[i]] = value_dict[x_np[i]] + y_np[i]
        count_dict[x_np[i]] = count_dict[x_np[i]] + 1
    for i in range(length):
        result[i] = (value_dict[x_np[i]] - y_np[i]) / (count_dict[x_np[i]] - 1)
    return result

In [24]:
%%timeit
length = data.shape[0]
result_tmp = np.zeros(data.shape[0])
result_3mv2 = target_mean_v3mv2(data['y'].values.copy(), data['x'].values.copy(), result_tmp, length)

173 µs ± 252 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [25]:
length = data.shape[0]
result_tmp = np.zeros(data.shape[0])
result_3mv2 = target_mean_v3mv2(data['y'].values.copy(), data['x'].values.copy(), result_tmp, length)
np.linalg.norm(result_1 - result_3mv2)

0.0

In [42]:
%%cython --cplus

cimport cython
cimport numpy as cnp
from cython.parallel import prange
from libcpp.unordered_map cimport unordered_map

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef target_mean_v3mv2_thread(long[::1] y_np, long[::1] x_np, double[::1] result, int length):
    cdef unordered_map[int, int] value_dict
    cdef unordered_map[int, int] count_dict
    cdef int i
    for i in range(length):
        value_dict[x_np[i]] = value_dict[x_np[i]] + y_np[i]
        count_dict[x_np[i]] = count_dict[x_np[i]] + 1
    for i in prange(length, nogil=True, num_threads=100):
        result[i] = (value_dict[x_np[i]] - y_np[i]) / (count_dict[x_np[i]] - 1)
    return result

In [43]:
%%timeit
length = data.shape[0]
result_tmp = np.zeros(data.shape[0])
result_3mv2_thread = target_mean_v3mv2_thread(data['y'].values.copy(), data['x'].values.copy(), result_tmp, length)

174 µs ± 1.19 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [28]:
length = data.shape[0]
result_tmp = np.zeros(data.shape[0])
result_3mv2_thread = target_mean_v3mv2_thread(data['y'].values.copy(), data['x'].values.copy(), result_tmp, length)
np.linalg.norm(np.array(list(result_3mv2)) - list(result_3mv2_thread))

0.0