## make new columns

In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame([])

In [3]:
df.assign?

[0;31mSignature:[0m [0mdf[0m[0;34m.[0m[0massign[0m[0;34m([0m[0;34m**[0m[0mkwargs[0m[0;34m)[0m [0;34m->[0m [0;34m'DataFrame'[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Assign new columns to a DataFrame.

Returns a new object with all original columns in addition to new ones.
Existing columns that are re-assigned will be overwritten.

Parameters
----------
**kwargs : dict of {str: callable or Series}
    The column names are keywords. If the values are
    callable, they are computed on the DataFrame and
    assigned to the new columns. The callable must not
    change input DataFrame (though pandas doesn't check it).
    If the values are not callable, (e.g. a Series, scalar, or array),
    they are simply assigned.

Returns
-------
DataFrame
    A new DataFrame with the new columns in addition to
    all the existing columns.

Notes
-----
Assigning multiple columns within the same ``assign`` is possible.
Later items in '\*\*kwargs' may refer to newly created or modifie

In [7]:
import numpy as np
df = pd.DataFrame({'A' : range(1,11), 'B' : np.random.randn(10)})
df

Unnamed: 0,A,B
0,1,-0.872682
1,2,0.274495
2,3,-0.942743
3,4,0.187626
4,5,0.800405
5,6,0.402906
6,7,0.468942
7,8,0.420222
8,9,-1.066689
9,10,-0.056426


In [11]:
df.assign(ln_A = lambda x: np.log(x.A)).head()

Unnamed: 0,A,B,ln_A
0,1,-0.872682,0.0
1,2,0.274495,0.693147
2,3,-0.942743,1.098612
3,4,0.187626,1.386294
4,5,0.800405,1.609438


In [12]:
df["ln_A"] = np.log(df.A)
df

Unnamed: 0,A,B,ln_A
0,1,-0.872682,0.0
1,2,0.274495,0.693147
2,3,-0.942743,1.098612
3,4,0.187626,1.386294
4,5,0.800405,1.609438
5,6,0.402906,1.791759
6,7,0.468942,1.94591
7,8,0.420222,2.079442
8,9,-1.066689,2.197225
9,10,-0.056426,2.302585


Unnamed: 0,A,B,ln_A
0,1,-0.872682,0.0
1,2,0.274495,0.693147
2,3,-0.942743,1.098612
3,4,0.187626,1.386294
4,5,0.800405,1.609438
5,6,0.402906,1.791759
6,7,0.468942,1.94591
7,8,0.420222,2.079442
8,9,-1.066689,2.197225
9,10,-0.056426,2.302585


## PD qcut 
### n개의 bucket만큼 빈 컬럼을 만들어준다

In [15]:
pd.qcut?

[0;31mSignature:[0m [0mpd[0m[0;34m.[0m[0mqcut[0m[0;34m([0m[0mx[0m[0;34m,[0m [0mq[0m[0;34m,[0m [0mlabels[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mretbins[0m[0;34m:[0m[0mbool[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m [0mprecision[0m[0;34m:[0m[0mint[0m[0;34m=[0m[0;36m3[0m[0;34m,[0m [0mduplicates[0m[0;34m:[0m[0mstr[0m[0;34m=[0m[0;34m'raise'[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Quantile-based discretization function.

Discretize variable into equal-sized buckets based on rank or based
on sample quantiles. For example 1000 values for 10 quantiles would
produce a Categorical object indicating quantile membership for each data point.

Parameters
----------
x : 1d ndarray or Series
q : int or list-like of int
    Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately
    array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles.
labels : array or False, default None
    Used as labels for the resulting bins. 

In [22]:
pd.qcut(df.A, 3, labels=["good", "medium", "bad"])

0      good
1      good
2      good
3      good
4    medium
5    medium
6    medium
7       bad
8       bad
9       bad
Name: A, dtype: category
Categories (3, object): [good < medium < bad]

In [24]:
df.max(axis=0) # df.max(axis=1) : 열중에서 가장 큰 값 가져온다

A       10.000000
B        0.800405
ln_A     2.302585
dtype: float64

In [27]:
df["B"].abs()

0    0.872682
1    0.274495
2    0.942743
3    0.187626
4    0.800405
5    0.402906
6    0.468942
7    0.420222
8    1.066689
9    0.056426
Name: B, dtype: float64