## build a baseline model

In [1]:
import pandas as pd

In [3]:
help(pd.DataFrame.query)

Help on function query in module pandas.core.frame:

query(self, expr, inplace=False, **kwargs)
    Query the columns of a frame with a boolean expression.
    
    Parameters
    ----------
    expr : string
        The query string to evaluate.  You can refer to variables
        in the environment by prefixing them with an '@' character like
        ``@a + b``.
    inplace : bool
        Whether the query should modify the data in place or return
        a modified copy
    
        .. versionadded:: 0.18.0
    
    kwargs : dict
        See the documentation for :func:`pandas.eval` for complete details
        on the keyword arguments accepted by :meth:`DataFrame.query`.
    
    Returns
    -------
    q : DataFrame
    
    Notes
    -----
    The result of the evaluation of this expression is first passed to
    :attr:`DataFrame.loc` and if that fails because of a
    multidimensional key (e.g., a DataFrame) then the result will be passed
    to :meth:`DataFrame.__getitem__`.
  

In [8]:
import numpy as np
df = pd.DataFrame(np.random.randn(10, 2), columns=list('ab'))

In [9]:
df

Unnamed: 0,a,b
0,-0.40151,0.955572
1,2.175935,-0.180631
2,-0.138358,1.422634
3,-0.098207,0.400516
4,0.646952,-0.427608
5,-0.164186,0.015988
6,0.489944,-0.837977
7,-0.590133,-0.710723
8,0.463645,1.225749
9,-1.875056,0.805617


In [10]:
test = df.query('a > 0')

In [11]:
test

Unnamed: 0,a,b
1,2.175935,-0.180631
4,0.646952,-0.427608
6,0.489944,-0.837977
8,0.463645,1.225749


In [12]:
help(test.assign)

Help on method assign in module pandas.core.frame:

assign(**kwargs) method of pandas.core.frame.DataFrame instance
    Assign new columns to a DataFrame, returning a new object
    (a copy) with the new columns added to the original ones.
    Existing columns that are re-assigned will be overwritten.
    
    Parameters
    ----------
    kwargs : keyword, value pairs
        keywords are the column names. If the values are
        callable, they are computed on the DataFrame and
        assigned to the new columns. The callable must not
        change input DataFrame (though pandas doesn't check it).
        If the values are not callable, (e.g. a Series, scalar, or array),
        they are simply assigned.
    
    Returns
    -------
    df : DataFrame
        A new DataFrame with the new columns in addition to
        all the existing columns.
    
    Notes
    -----
    Assigning multiple columns within the same ``assign`` is possible.
    For Python 3.6 and above, later items i

In [23]:
test2 = df.assign(gz = lambda x: df.a > 0, testMultiColum = True)

In [25]:
test2.gz.astype(int)

0    0
1    1
2    0
3    0
4    1
5    0
6    1
7    0
8    1
9    0
Name: gz, dtype: int32

In [33]:
test2.gz.map(lambda x : int(x))

0    0
1    1
2    0
3    0
4    1
5    0
6    1
7    0
8    1
9    0
Name: gz, dtype: int64

In [44]:
def testf(x):
    x['gz'] = int(x['gz'])
    return x
test2.apply(testf, axis='columns')

Unnamed: 0,a,b,gz,testMultiColum
0,-0.40151,0.955572,0,True
1,2.175935,-0.180631,1,True
2,-0.138358,1.422634,0,True
3,-0.098207,0.400516,0,True
4,0.646952,-0.427608,1,True
5,-0.164186,0.015988,0,True
6,0.489944,-0.837977,1,True
7,-0.590133,-0.710723,0,True
8,0.463645,1.225749,1,True
9,-1.875056,0.805617,0,True
