# Creating Columns as Functions of Existing Columns

Import `pandas` and `numpy`

In [18]:
import pandas as pd
import numpy as np

In [19]:
dates = pd.date_range('1/1/2020', periods=8)
data = np.random.randn(8, 3)
df = pd.DataFrame(data,
                  index=dates, columns=['A', 'B', 'C'])
df

Unnamed: 0,A,B,C
2020-01-01,-0.525468,-0.278131,2.247046
2020-01-02,-0.035327,0.037103,-0.057902
2020-01-03,0.527002,1.629287,-1.265782
2020-01-04,0.548755,-0.090065,-1.369624
2020-01-05,1.035193,-2.037408,0.991138
2020-01-06,-0.434253,0.863868,-0.283219
2020-01-07,-1.484812,-1.969011,-0.519139
2020-01-08,0.084231,0.428535,-1.685819


Create a new column as a function of an existing column. 

In [20]:
df['Squared_A'] = df['A']**2

Create a new column as the sum, and product of two existing columns

In [21]:
df['Sum_AB'] = df['A'] + df['B']
df['Prod_AB'] = df['A'] * df['B']
df

Unnamed: 0,A,B,C,Squared_A,Sum_AB,Prod_AB
2020-01-01,-0.525468,-0.278131,2.247046,0.276117,-0.803599,0.146149
2020-01-02,-0.035327,0.037103,-0.057902,0.001248,0.001775,-0.001311
2020-01-03,0.527002,1.629287,-1.265782,0.277731,2.15629,0.858638
2020-01-04,0.548755,-0.090065,-1.369624,0.301132,0.45869,-0.049424
2020-01-05,1.035193,-2.037408,0.991138,1.071624,-1.002216,-2.10911
2020-01-06,-0.434253,0.863868,-0.283219,0.188576,0.429614,-0.375137
2020-01-07,-1.484812,-1.969011,-0.519139,2.204668,-3.453823,2.923612
2020-01-08,0.084231,0.428535,-1.685819,0.007095,0.512767,0.036096


Create a new column evaluating a function on existing columns. To do this we need to 

- Define the function that we want to evaluate
- Use the method `apply` combined with a `lambda` expression

In [22]:
from math import sin

def my_function(x, y, z):
    return sin(x) + sin(y) + sin(z)


df['function_ABC'] = df.apply(lambda x: my_function(x['A'], x['B'], x['C']), axis=1)
df

Unnamed: 0,A,B,C,Squared_A,Sum_AB,Prod_AB,function_ABC
2020-01-01,-0.525468,-0.278131,2.247046,0.276117,-0.803599,0.146149,0.003748
2020-01-02,-0.035327,0.037103,-0.057902,0.001248,0.001775,-0.001311,-0.056095
2020-01-03,0.527002,1.629287,-1.265782,0.277731,2.15629,0.858638,0.547392
2020-01-04,0.548755,-0.090065,-1.369624,0.301132,0.45869,-0.049424,-0.548151
2020-01-05,1.035193,-2.037408,0.991138,1.071624,-1.002216,-2.10911,0.803513
2020-01-06,-0.434253,0.863868,-0.283219,0.188576,0.429614,-0.375137,0.060179
2020-01-07,-1.484812,-1.969011,-0.519139,2.204668,-3.453823,2.923612,-2.414193
2020-01-08,0.084231,0.428535,-1.685819,0.007095,0.512767,0.036096,-0.493721


In [23]:
my_function(-0.634133,0.576355,-0.265548)

-0.30994599716885773

Create a column which maps the values of a column to a set of colors depending on some boundaries. 
For example, we are going to map column `C` as follows:

- 'black' for values < -3
- 'yellow' for values in [-3, 3]
- 'green' for values >3
  

In [24]:
def color_function(x):
    if x < -3:
        return 'black'
    elif x > 3:
        return 'green'
    else: 
        return 'yellow'


df['color'] = df.apply(lambda x: color_function(x['C']), axis=1)
df

Unnamed: 0,A,B,C,Squared_A,Sum_AB,Prod_AB,function_ABC,color
2020-01-01,-0.525468,-0.278131,2.247046,0.276117,-0.803599,0.146149,0.003748,yellow
2020-01-02,-0.035327,0.037103,-0.057902,0.001248,0.001775,-0.001311,-0.056095,yellow
2020-01-03,0.527002,1.629287,-1.265782,0.277731,2.15629,0.858638,0.547392,yellow
2020-01-04,0.548755,-0.090065,-1.369624,0.301132,0.45869,-0.049424,-0.548151,yellow
2020-01-05,1.035193,-2.037408,0.991138,1.071624,-1.002216,-2.10911,0.803513,yellow
2020-01-06,-0.434253,0.863868,-0.283219,0.188576,0.429614,-0.375137,0.060179,yellow
2020-01-07,-1.484812,-1.969011,-0.519139,2.204668,-3.453823,2.923612,-2.414193,yellow
2020-01-08,0.084231,0.428535,-1.685819,0.007095,0.512767,0.036096,-0.493721,yellow


Create a column using a condition. For example, checking of the value of a column is positive or negative.

In [25]:
df['Flag'] = df['A'] >= 0 
df 

Unnamed: 0,A,B,C,Squared_A,Sum_AB,Prod_AB,function_ABC,color,Flag
2020-01-01,-0.525468,-0.278131,2.247046,0.276117,-0.803599,0.146149,0.003748,yellow,False
2020-01-02,-0.035327,0.037103,-0.057902,0.001248,0.001775,-0.001311,-0.056095,yellow,False
2020-01-03,0.527002,1.629287,-1.265782,0.277731,2.15629,0.858638,0.547392,yellow,True
2020-01-04,0.548755,-0.090065,-1.369624,0.301132,0.45869,-0.049424,-0.548151,yellow,True
2020-01-05,1.035193,-2.037408,0.991138,1.071624,-1.002216,-2.10911,0.803513,yellow,True
2020-01-06,-0.434253,0.863868,-0.283219,0.188576,0.429614,-0.375137,0.060179,yellow,False
2020-01-07,-1.484812,-1.969011,-0.519139,2.204668,-3.453823,2.923612,-2.414193,yellow,False
2020-01-08,0.084231,0.428535,-1.685819,0.007095,0.512767,0.036096,-0.493721,yellow,True


Map a column with boolean values to color names in string format. For example, map `True` to color 'green' and `False` to color 'red'.

In [27]:
df['Flag_to_Colors'] = df['Flag'].map({True:'green', False:'red'}) 
df

Unnamed: 0,A,B,C,Squared_A,Sum_AB,Prod_AB,function_ABC,color,Flag,Flag_to_Colors
2020-01-01,-0.525468,-0.278131,2.247046,0.276117,-0.803599,0.146149,0.003748,yellow,False,red
2020-01-02,-0.035327,0.037103,-0.057902,0.001248,0.001775,-0.001311,-0.056095,yellow,False,red
2020-01-03,0.527002,1.629287,-1.265782,0.277731,2.15629,0.858638,0.547392,yellow,True,green
2020-01-04,0.548755,-0.090065,-1.369624,0.301132,0.45869,-0.049424,-0.548151,yellow,True,green
2020-01-05,1.035193,-2.037408,0.991138,1.071624,-1.002216,-2.10911,0.803513,yellow,True,green
2020-01-06,-0.434253,0.863868,-0.283219,0.188576,0.429614,-0.375137,0.060179,yellow,False,red
2020-01-07,-1.484812,-1.969011,-0.519139,2.204668,-3.453823,2.923612,-2.414193,yellow,False,red
2020-01-08,0.084231,0.428535,-1.685819,0.007095,0.512767,0.036096,-0.493721,yellow,True,green
