In [1]:
%load_ext Cython

# API实现

`target_mean_v1`和`target_mean_v2`是老师给出的接口实现，其中`target_mean_v2`是老师解决`target_mean_v1`中for循环总进行循环而导致程序复杂度增加的问题，利用两个字典分别存储value和count进行计算，达到用空间换时间的目的。

In [2]:
import numpy as np
import pandas as pd

def target_mean_v1(data, y_name, x_name):
    result = np.zeros(data.shape[0])
    for i in range(data.shape[0]):
        groupby_result = data[data.index != i].groupby([x_name], as_index=False).agg(['mean', 'count'])
        result[i] = groupby_result.loc[groupby_result.index == data.loc[i, x_name], (y_name, 'mean')]
    return result


def target_mean_v2(data, y_name, x_name):
    result = np.zeros(data.shape[0])
    value_dict = dict()
    count_dict = dict()
    for i in range(data.shape[0]):
        if data.loc[i, x_name] not in value_dict.keys():
            value_dict[data.loc[i, x_name]] = data.loc[i, y_name]
            count_dict[data.loc[i, x_name]] = 1
        else:
            value_dict[data.loc[i, x_name]] += data.loc[i, y_name]
            count_dict[data.loc[i, x_name]] += 1
    for i in range(data.shape[0]):
        result[i] = (value_dict[data.loc[i, x_name]] - data.loc[i, y_name]) / (count_dict[data.loc[i, x_name]] - 1)
    return result

## Cython串行实现(一)
该版本是对很简单对老师给出的版本中的相关变量的定义直接替换为cython相关变量，在该实例中，API中传递的参数依然是没有指定类型的，以此做一个最简单的练习实现。

In [3]:
%%cython -a

import numpy as np
import pandas as pd
cimport cython
cimport numpy as cnp

@cython.boundscheck(False)
@cython.wraparound(False)
def target_mean_cython(data, y_name, x_name):
  result = np.zeros(data.shape[0])
  cdef dict value_dict = {}
  cdef dict count_dict = {}
  for i in range(data.shape[0]):
    if data.loc[i, x_name] not in value_dict.keys():
      value_dict[data.loc[i, x_name]] = data.loc[i, y_name]
      count_dict[data.loc[i, x_name]] = 1
    else:
      value_dict[data.loc[i, x_name]] += data.loc[i, y_name]
      count_dict[data.loc[i, x_name]] += 1
  for i in range(data.shape[0]):
    result[i] = (value_dict[data.loc[i, x_name]] - data.loc[i, y_name]) / (count_dict[data.loc[i, x_name]] - 1)
  return result

## Cython串行实现(二)
Cython中参数是可以指定类型的，添加`target_mean_cython_v2`接口，实现对两列数据的处理，为保持和改写前接口的一致性，添加`target_mean_cython_v2_test`接口，使得客户端在进行调用时，可以保持一致的形式。

In [46]:
%%cython -a

import numpy as np
import pandas as pd
cimport cython
cimport numpy as cnp

@cython.boundscheck(False)
@cython.wraparound(False)
def target_mean_cython_v2(cnp.ndarray[long] x, cnp.ndarray[long] y, int shape,
                            cnp.ndarray result):
  cdef dict value_dict = {}
  cdef dict count_dict = {}
  for i in range(shape):
    if x[i] not in value_dict.keys():
      value_dict[x[i]] = y[i]
      count_dict[x[i]] = 1
    else:
      value_dict[x[i]] += y[i]
      count_dict[x[i]] += 1
  for i in range(shape):
    result[i] = (value_dict[x[i]] - y[i]) / (count_dict[x[i]] - 1)
  return result

def target_mean_cython_v2_test(data, y_name, x_name):
  x = data[x_name].values
  y = data[y_name].values
  shape = data.shape[0]
  result = np.zeros(shape)
  return target_mean_cython_v2(x, y, shape, result)

## Cython并行实现
和上述实现类似，添加`target_mean_cython_parallel_test`接口使得客户端调用形式可以保持一致，`target_mean_cython_parallel`对数据进行详细的处理，该实现中的字典采用C++中的map。

In [73]:
%%cython -a
# distutils: language = c++

import numpy as np
import pandas as pd
from cython.parallel import prange
from libcpp.map cimport map
cimport cython
cimport numpy as cnp

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef target_mean_cython_parallel(long[:] x, long[:] y,
                                  const int shape, double[:] result) :
  cdef map[int, int] value_dict
  cdef map[int, int] count_dict
  cdef int i
  for i in prange(shape, nogil = True):
    if value_dict.find(x[i]) == value_dict.end():
      value_dict[x[i]] = y[i]
      count_dict[x[i]] = 1
    else:
      value_dict[x[i]] += y[i]
      count_dict[x[i]] += 1
  
  for i in prange(shape, nogil = True):
    result[i] = (value_dict[x[i]] - y[i]) / (count_dict[x[i]] - 1)


def target_mean_cython_parallel_test(data, y_name, x_name):
  x = data[x_name].values
  y = data[y_name].values
  shape = data.shape[0]
  result = np.zeros(shape)
  target_mean_cython_parallel(x, y, shape, result)
  return result

# API测试

首先，构造测试数据，然后将相同的数据传递给不同的接口，将数据和`target_mean_v1`的结果进行对比，最后，对比不同接口的耗时。

In [70]:
y = np.random.randint(2, size=(500, 1))
x = np.random.randint(10, size=(500, 1))
data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x'])

In [74]:
result_1 = target_mean_v1(data, 'y', 'x')
result_2 = target_mean_v2(data, 'y', 'x')
result_3 = target_mean_cython(data, 'y', 'x')
result_4 = target_mean_cython_v2_test(data, 'y', 'x')
result_5 = target_mean_cython_parallel_test(data, 'y', 'x')
assert 0.0 == np.linalg.norm(result_1 - result_2)
assert 0.0 == np.linalg.norm(result_1 - result_3)
assert 0.0 == np.linalg.norm(result_1 - result_4)
assert 0.0 == np.linalg.norm(result_1 - result_5)


In [75]:
%timeit -n 100 target_mean_v1(data, 'y', 'x')
%timeit -n 100 target_mean_v2(data, 'y', 'x')
%timeit -n 100 target_mean_cython(data, 'y', 'x')
%timeit -n 100 target_mean_cython_v2_test(data, 'y', 'x')
%timeit -n 100 target_mean_cython_parallel_test(data, 'y', 'x')

100 loops, best of 3: 2.39 s per loop
100 loops, best of 3: 26.3 ms per loop
100 loops, best of 3: 26.5 ms per loop
100 loops, best of 3: 121 µs per loop
100 loops, best of 3: 37.6 µs per loop


# 结论
- 由第2行和第3行的数据可见，简单地采用c接口，依然是未确定的数据类型，对程序的执行效率的提升效率效果很微弱。
- 由第3行和第4行的数据对比可见，在接口中采用cython确定的数据类型，程序的执行效率可以得了接近50倍的提升。
- 将第5行和第1-4行的数据对比可见，采用并行，对程序的执行效率相对于cython提升了近3倍，相对与python实现得到了数量级上的提升。