<a href="https://colab.research.google.com/github/RUMONMD89/Data-Science/blob/main/class4%20Iteration%20and%20exception%20handling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 3.7 反復処理(iteration)

In [None]:
import numpy as np
import pandas as pd

### 3.7.1 指定した回数→1次元データ(Specified number of times → 1D data)

In [26]:
def f1(x):
    tmp = np.random.random(x)
    return np.mean(tmp)

f1(10)                # 動作確認
#> 0.5427033207230424 # 結果の例

0.4614183885400903

In [None]:
[f1(10) for i in range(3)]
#> [0.4864425069985622,
#>  0.4290935578857099,
#>  0.535206509631883]

[0.35705950100813677, 0.5039611397924781, 0.6551777449559382]

In [None]:
[f1(10)] * 3
#> [0.43725641184595576,
#>  0.43725641184595576,
#>  0.43725641184595576]

[0.49287941809544156, 0.49287941809544156, 0.49287941809544156]

### 3.7.2 1次元データ→1次元データ(1D data)

In [None]:
v = [5, 10, 100]
[f1(x) for x in v] # 方法1
#> [0.454, 0.419, 0.552]

# あるいは

v = pd.Series([5, 10, 100])
v.apply(f1)        # 方法2
#> 0    0.394206
#> 1    0.503949
#> 2    0.532698
#> dtype: float64

0    0.474632
1    0.450197
2    0.518493
dtype: float64

In [None]:
pd.Series([10] * 3).apply(f1)
# 結果は割愛

0    0.401402
1    0.484935
2    0.451970
dtype: float64

### 3.7.3 1次元データ→データフレーム(data frame)

In [None]:
def f2(n):
    tmp = np.random.random(n)
    return pd.Series([
        n,
        tmp.mean(),
        tmp.std(ddof=1)],
        index=['x', 'p', 'q'])

f2(10) # 動作確認 operation check
#> x    10.000000
#> p     0.405898 （平均の例）average example
#> q     0.317374 （標準偏差の例）Example of standard deviation
#> dtype: float64

x    10.000000
p     0.644111
q     0.266565
dtype: float64

In [None]:
v = pd.Series([5, 10, 100])
v.apply(f2)
#>        x         p         q
#> 0    5.0  0.507798  0.207970
#> 1   10.0  0.687198  0.264427
#> 2  100.0  0.487872  0.280743

Unnamed: 0,x,p,q
0,5.0,0.452937,0.383029
1,10.0,0.434347,0.238122
2,100.0,0.454068,0.272854


### 3.7.4 データフレーム→データフレーム

In [None]:
def f3(x, y):
    tmp = np.random.random(x) * y
    return pd.Series([
        x,
        y,
        tmp.mean(),
        tmp.std(ddof=1)],
        index=['x', 'y', 'p', 'q'])

f3(10, 6) # 動作確認
#> x    10.000000
#> y     6.000000
#> p     2.136413 （平均の例）
#> q     1.798755 （標準偏差の例）
#> dtype: float64

x    10.000000
y     6.000000
p     3.134258
q     1.811798
dtype: float64

In [None]:
my_df = pd.DataFrame({
    'x': [5, 10, 100,  5, 10, 100],
    'y': [6,  6,   6, 12, 12,  12]})

my_df.apply(
  lambda row: f3(row['x'], row['y']),
  axis=1)
# あるいは
my_df.apply(lambda row:
            f3(*row), axis=1)

#>        x     y    p    q
#> 0   5.00  6.00 3.37 1.96
#> 1  10.00  6.00 1.92 0.95
#> 2 100.00  6.00 2.90 1.73
#> 3   5.00 12.00 6.82 3.00
#> 4  10.00 12.00 7.05 2.42
#> 5 100.00 12.00 5.90 3.54

Unnamed: 0,x,y,p,q
0,5.0,6.0,1.326711,0.775169
1,10.0,6.0,3.338593,1.570082
2,100.0,6.0,3.092437,1.845319
3,5.0,12.0,8.280876,2.139346
4,10.0,12.0,4.672397,4.207249
5,100.0,12.0,6.446032,3.396524


### 3.7.5 補足：反復処理の並列化(Supplement: Parallelization of iteration processing)

In [None]:
#ADD
!pip install pandarallel
#ADD

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pandarallel
  Downloading pandarallel-1.6.3.tar.gz (12 kB)
Building wheels for collected packages: pandarallel
  Building wheel for pandarallel (setup.py) ... [?25l[?25hdone
  Created wheel for pandarallel: filename=pandarallel-1.6.3-py3-none-any.whl size=16463 sha256=9814c1695ae280ec8abcc158de1893feace5379a7c3d445cd4cc4123d2050ecc
  Stored in directory: /root/.cache/pip/wheels/3c/a4/19/02a1f08d032a017d5d7e22da595aa652ba0a2f2e22de73981b
Successfully built pandarallel
Installing collected packages: pandarallel
Successfully installed pandarallel-1.6.3


In [None]:
from pandarallel import pandarallel
pandarallel.initialize() # 準備

v = pd.Series([5, 10, 100])
v.parallel_apply(f1)
# 結果は割愛

INFO: Pandarallel will run on 1 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


0    0.601915
1    0.511435
2    0.497041
dtype: float64

## 3.8 その他

### 3.8.1 よく遭遇するエラーとその対処方法(Common errors and how to deal with them)

### 3.8.2 変数や関数についての調査(Research on variables and functions)

In [None]:
x = 123
type(x)
#> int

int

In [None]:
%whos
#> Variable   Type      Data/Info
#> ------------------------------
#> x          int       123

Variable      Type         Data/Info
------------------------------------
f1            function     <function f1 at 0x7f43edcfeb90>
f2            function     <function f2 at 0x7f43edd08a70>
f3            function     <function f3 at 0x7f43edc37170>
my_df         DataFrame         x   y\n0    5   6\n1<...>2\n4   10  12\n5  100  12
np            module       <module 'numpy' from '/us<...>kages/numpy/__init__.py'>
pandarallel   type         <class 'pandarallel.core.pandarallel'>
pd            module       <module 'pandas' from '/u<...>ages/pandas/__init__.py'>
v             Series       0      5\n1     10\n2    100\ndtype: int64
x             int          123


In [30]:
import math
math.log
# あるいは
help(math.log)

Help on built-in function log in module math:

log(...)
    log(x, [base=math.e])
    Return the logarithm of x to the given base.
    
    If the base not specified, returns the natural logarithm (base e) of x.



### 3.8.3 RのNA，Pythonのnan

In [31]:
import numpy as np
v = [1, np.nan, 3]
v
#> [1, nan, 3]

[1, nan, 3]

In [None]:
np.isnan(v[1])
#> True

v[1] == np.nan # 誤り
#> False

False