<a href="https://colab.research.google.com/github/MS-H2020/Open/blob/main/cupy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Test for Cupy


## Sample code (Quote form [Qiita,@fujine(fujine),June 2023](https://qiita.com/fujine/items/6c997a073fec5bcea512))


### Importing Library

In [51]:
import numpy as np
import sklearn
from sklearn import config_context
from sklearn.datasets import make_classification
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.utils._array_api import _estimator_with_converted_arrays
import cupy as cp
import cupy.array_api as xp

for pkg in (sklearn, cp):
    print(f'{pkg.__name__:<8} : {pkg.__version__}')


sklearn  : 1.2.2
cupy     : 12.2.0


In [65]:
X_np, y_np = make_classification(n_samples=10 ** 6, n_features=10, random_state=0) # Under this condition Google Colab with free plan, n_sample=10**7 make memory leak)

print(X_np.shape)

(1000000, 10)


In [66]:
print(y_np.sum() / len(y_np))

0.500054


In [67]:
%%time
lda_np = LinearDiscriminantAnalysis(solver='svd')
X_trans = lda_np.fit_transform(X_np, y_np)

print(f'intercept : {lda_np.intercept_}')
print(f'coef : {lda_np.coef_}')
print(f'type: {type(X_trans)}')

intercept : [0.00220022]
coef : [[ 1.42411416e-03 -2.27213457e+00  4.52770557e-03 -6.47803039e-01
   1.78904332e+00 -3.23051275e-01 -4.96373846e-03 -4.24765373e-04
  -1.31483698e-03  4.60770008e-03]]
type: <class 'numpy.ndarray'>
CPU times: user 1.57 s, sys: 235 ms, total: 1.81 s
Wall time: 1.58 s


In [68]:
X_cu = xp.asarray(X_np)
y_cu = xp.asarray(y_np)

print(X_cu.device, y_cu.device)

<CUDA Device 0> <CUDA Device 0>


In [69]:
%%time
with config_context(array_api_dispatch=True):
    lda_cu = LinearDiscriminantAnalysis(solver='svd')
    lda_cu.fit(X_cu[:10, :], y_cu[:10])
    del lda_cu

CPU times: user 14.6 ms, sys: 0 ns, total: 14.6 ms
Wall time: 14.4 ms


In [70]:
%%time
with config_context(array_api_dispatch=True):
    lda_cu = LinearDiscriminantAnalysis(solver='svd')
    X_trans = lda_cu.fit_transform(X_cu, y_cu)

CPU times: user 190 ms, sys: 1.11 ms, total: 191 ms
Wall time: 213 ms


In [71]:
print(f'intercept : {lda_cu.intercept_}')
print(f'coef : {lda_cu.coef_}')
print(f'device: {X_trans.device}')

intercept : [0.00220022]
coef : [[ 1.42411416e-03 -2.27213457e+00  4.52770557e-03 -6.47803039e-01
   1.78904332e+00 -3.23051275e-01 -4.96373846e-03 -4.24765373e-04
  -1.31483698e-03  4.60770008e-03]]
device: <CUDA Device 0>


In [72]:
converter = lambda arr : arr._array.get()
lda_cu2np = _estimator_with_converted_arrays(lda_cu, converter=converter)
X_trans = lda_cu2np.transform(X_np)

print(type(X_trans))

<class 'numpy.ndarray'>


In [73]:
pool = cp.get_default_memory_pool()
pool.free_all_blocks()

## Quoted from Scikit-learn offcial HP, ["11.1. Array API support (experimental)](https://scikit-learn.org/stable/modules/array_api.html)

In [52]:
from sklearn.datasets import make_classification
from sklearn import config_context
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import cupy.array_api as xp

X_np, y_np = make_classification(random_state=0)
X_cu = xp.asarray(X_np)
y_cu = xp.asarray(y_np)
X_cu.device

<CUDA Device 0>

In [53]:
with config_context(array_api_dispatch=True):
    lda = LinearDiscriminantAnalysis()
    X_trans = lda.fit_transform(X_cu, y_cu)
X_trans.device

<CUDA Device 0>

In [54]:
from sklearn.utils._array_api import _estimator_with_converted_arrays
cupy_to_ndarray = lambda array : array.get()
lda_np = _estimator_with_converted_arrays(lda, cupy_to_ndarray)
X_trans = lda_np.transform(X_np)
type(X_trans)

AttributeError: 'Array' object has no attribute 'get'