|
65 | 65 | "amin", |
66 | 66 | "average", |
67 | 67 | "bincount", |
| 68 | + "corrcoef", |
68 | 69 | "correlate", |
69 | 70 | "cov", |
70 | 71 | "max", |
@@ -360,6 +361,127 @@ def bincount(x1, weights=None, minlength=0): |
360 | 361 | return call_origin(numpy.bincount, x1, weights=weights, minlength=minlength) |
361 | 362 |
|
362 | 363 |
|
| 364 | +def corrcoef(x, y=None, rowvar=True, *, dtype=None): |
| 365 | + """ |
| 366 | + Return Pearson product-moment correlation coefficients. |
| 367 | +
|
| 368 | + For full documentation refer to :obj:`numpy.corrcoef`. |
| 369 | +
|
| 370 | + Parameters |
| 371 | + ---------- |
| 372 | + x : {dpnp.ndarray, usm_ndarray} |
| 373 | + A 1-D or 2-D array containing multiple variables and observations. |
| 374 | + Each row of `x` represents a variable, and each column a single |
| 375 | + observation of all those variables. Also see `rowvar` below. |
| 376 | + y : {None, dpnp.ndarray, usm_ndarray}, optional |
| 377 | + An additional set of variables and observations. `y` has the same |
| 378 | + shape as `x`. |
| 379 | + Default: ``None``. |
| 380 | + rowvar : {bool}, optional |
| 381 | + If `rowvar` is ``True``, then each row represents a variable, |
| 382 | + with observations in the columns. Otherwise, the relationship |
| 383 | + is transposed: each column represents a variable, while the rows |
| 384 | + contain observations. |
| 385 | + Default: ``True``. |
| 386 | + dtype : {None, dtype}, optional |
| 387 | + Data-type of the result. |
| 388 | + Default: ``None``. |
| 389 | +
|
| 390 | + Returns |
| 391 | + ------- |
| 392 | + R : {dpnp.ndarray} |
| 393 | + The correlation coefficient matrix of the variables. |
| 394 | +
|
| 395 | + See Also |
| 396 | + -------- |
| 397 | + :obj:`dpnp.cov` : Covariance matrix. |
| 398 | +
|
| 399 | + Examples |
| 400 | + -------- |
| 401 | + In this example we generate two random arrays, ``xarr`` and ``yarr``, and |
| 402 | + compute the row-wise and column-wise Pearson correlation coefficients, |
| 403 | + ``R``. Since `rowvar` is true by default, we first find the row-wise |
| 404 | + Pearson correlation coefficients between the variables of ``xarr``. |
| 405 | +
|
| 406 | + >>> import dpnp as np |
| 407 | + >>> np.random.seed(123) |
| 408 | + >>> xarr = np.random.rand(3, 3).astype(np.float32) |
| 409 | + >>> xarr |
| 410 | + array([[7.2858386e-17, 2.2066992e-02, 3.9520904e-01], |
| 411 | + [4.8012391e-01, 5.9377134e-01, 4.5147297e-01], |
| 412 | + [9.0728188e-01, 9.9387854e-01, 5.8399546e-01]], dtype=float32) |
| 413 | + >>> R1 = np.corrcoef(xarr) |
| 414 | + >>> R1 |
| 415 | + array([[ 0.99999994, -0.6173796 , -0.9685411 ], |
| 416 | + [-0.6173796 , 1. , 0.7937219 ], |
| 417 | + [-0.9685411 , 0.7937219 , 0.9999999 ]], dtype=float32) |
| 418 | +
|
| 419 | + If we add another set of variables and observations ``yarr``, we can |
| 420 | + compute the row-wise Pearson correlation coefficients between the |
| 421 | + variables in ``xarr`` and ``yarr``. |
| 422 | +
|
| 423 | + >>> yarr = np.random.rand(3, 3).astype(np.float32) |
| 424 | + >>> yarr |
| 425 | + array([[0.17615308, 0.65354985, 0.15716429], |
| 426 | + [0.09373496, 0.2123185 , 0.84086883], |
| 427 | + [0.9011005 , 0.45206687, 0.00225109]], dtype=float32) |
| 428 | + >>> R2 = np.corrcoef(xarr, yarr) |
| 429 | + >>> R2 |
| 430 | + array([[ 0.99999994, -0.6173796 , -0.968541 , -0.48613155, 0.9951523 , |
| 431 | + -0.8900264 ], |
| 432 | + [-0.6173796 , 1. , 0.7937219 , 0.9875833 , -0.53702235, |
| 433 | + 0.19083664], |
| 434 | + [-0.968541 , 0.7937219 , 0.9999999 , 0.6883078 , -0.9393724 , |
| 435 | + 0.74857277], |
| 436 | + [-0.48613152, 0.9875833 , 0.6883078 , 0.9999999 , -0.39783284, |
| 437 | + 0.0342579 ], |
| 438 | + [ 0.9951523 , -0.53702235, -0.9393725 , -0.39783284, 0.99999994, |
| 439 | + -0.9305482 ], |
| 440 | + [-0.89002645, 0.19083665, 0.7485727 , 0.0342579 , -0.9305482 , |
| 441 | + 1. ]], dtype=float32) |
| 442 | +
|
| 443 | + Finally if we use the option ``rowvar=False``, the columns are now |
| 444 | + being treated as the variables and we will find the column-wise Pearson |
| 445 | + correlation coefficients between variables in ``xarr`` and ``yarr``. |
| 446 | +
|
| 447 | + >>> R3 = np.corrcoef(xarr, yarr, rowvar=False) |
| 448 | + >>> R3 |
| 449 | + array([[ 1. , 0.9724453 , -0.9909503 , 0.8104691 , -0.46436927, |
| 450 | + -0.1643624 ], |
| 451 | + [ 0.9724453 , 1. , -0.9949381 , 0.6515728 , -0.6580445 , |
| 452 | + 0.07012729], |
| 453 | + [-0.99095035, -0.994938 , 1. , -0.72450536, 0.5790461 , |
| 454 | + 0.03047091], |
| 455 | + [ 0.8104691 , 0.65157276, -0.72450536, 1. , 0.14243561, |
| 456 | + -0.71102554], |
| 457 | + [-0.4643693 , -0.6580445 , 0.57904613, 0.1424356 , 0.99999994, |
| 458 | + -0.79727215], |
| 459 | + [-0.1643624 , 0.07012729, 0.03047091, -0.7110255 , -0.7972722 , |
| 460 | + 0.99999994]], dtype=float32) |
| 461 | + """ |
| 462 | + |
| 463 | + out = dpnp.cov(x, y, rowvar, dtype=dtype) |
| 464 | + if out.ndim == 0: |
| 465 | + # scalar covariance |
| 466 | + # nan if incorrect value (nan, inf, 0), 1 otherwise |
| 467 | + return out / out |
| 468 | + |
| 469 | + d = dpnp.diag(out) |
| 470 | + |
| 471 | + stddev = dpnp.sqrt(d.real) |
| 472 | + out /= stddev[:, None] |
| 473 | + out /= stddev[None, :] |
| 474 | + |
| 475 | + # Clip real and imaginary parts to [-1, 1]. This does not guarantee |
| 476 | + # abs(a[i,j]) <= 1 for complex arrays, but is the best we can do without |
| 477 | + # excessive work. |
| 478 | + dpnp.clip(out.real, -1, 1, out=out.real) |
| 479 | + if dpnp.iscomplexobj(out): |
| 480 | + dpnp.clip(out.imag, -1, 1, out=out.imag) |
| 481 | + |
| 482 | + return out |
| 483 | + |
| 484 | + |
363 | 485 | def correlate(x1, x2, mode="valid"): |
364 | 486 | """ |
365 | 487 | Cross-correlation of two 1-dimensional sequences. |
|
0 commit comments