# Pandas  Iteracja

---


In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter("ignore")

df = pd.DataFrame(pd.Series(range(10)).values.reshape(5, 2), columns = ['x', 'y'])
df

In [None]:
for col in df.items():
    print(col)

In [None]:
for row in df.iterrows():
    print(row)

In [None]:
for row in df.iterrows():
    print(row[1][0], row[1][1])

In [None]:
data = []

for row in df.iterrows():
    x = row[1][0]
    y = row[1][1]
    data.append( [ x, y, x/y] )
df2 = pd.DataFrame(data, columns = ['x', 'y', 'ratio'])
df2

> ## <span style="color: red">Warning</span>
> Iterating through pandas objects is generally <span style="color: cyan">__slow__</span>. In many cases, iterating manually over the rows is not needed and can be avoided with one of the following approaches:
> - Look for a vectorized solution: many operations can be performed using built-in methods or NumPy functions, (boolean) indexing, …
> - When you have a function that cannot work on the full DataFrame/Series at once, it is better to use apply() instead of iterating over the values. See the docs on function application.
> - If you need to do iterative manipulations on the values but performance is important, consider writing the inner loop with cython or numba. See the enhancing performance section for some examples of this approach.
> 
> https://pandas.pydata.org/pandas-docs/stable/user_guide/basics.html#iteration

### Nieźle - <span style="color: cyan">LIST COMPREHENSION</span>

In [None]:
df2 = pd.DataFrame( (( row[1][0], row[1][1], row[1][0]/row[1][1]) for row in df.iterrows()), columns = ['x', 'y', 'ratio'])
df2

### Lepiej - <span style="color: cyan">APPLY</span>

In [None]:
df2 = pd.DataFrame.copy(df)

In [None]:
def ratio(row):
    print(row.iloc(0)[0])
    return row.x/row.y

df2['ratio'] = df2.apply(ratio, axis=1)
df2

### Najlepiej - <span style="color: cyan">Vectorization</span>

In [None]:
df2 = pd.DataFrame.copy(df)

In [None]:
df2['Total'] = df2.x / df2.y
df2

In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame(pd.Series(range(20_000)).values.reshape(10_000, 2), columns = ['x', 'y'])
df

In [None]:
%%time
data = []

for row in df.iterrows():
    x = row[1][0]
    y = row[1][1]
    data.append( [ x, y, x/y] )
df2 = pd.DataFrame(data, columns = ['x', 'y', 'ratio'])

In [None]:
%%time
data = []

for row in df.iterrows():
    data.append( [ row[1][0], row[1][1], row[1][0]/row[1][1]] )
df2 = pd.DataFrame(data, columns = ['x', 'y', 'ratio'])

In [None]:
%%time
df2 = pd.DataFrame( [[ row[1][0], row[1][1], row[1][0]/row[1][1]] for row in df.iterrows()], columns = ['x', 'y', 'ratio'])

In [None]:
df2 = pd.DataFrame.copy(df)

In [None]:
%%time
def ratio(row):
    return row.x/row.y

df2['ratio'] = df2.apply(ratio, axis=1)

In [None]:
df2 = pd.DataFrame.copy(df)

In [None]:
%%time
df2['Total'] = df2.x / df2.y