## Modern Pandas DF Column Manipulation Examples

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Dummy data
df = pd.DataFrame({'id': [1,2,3,4], 
                    'abc': ['A', 'B', 'C','D'],
                    'foo': ['foo', 'bar', 'foo','bar']})
df

Unnamed: 0,id,abc,foo
0,1,A,foo
1,2,B,bar
2,3,C,foo
3,4,D,bar


### Example 1: Combining string columns into a new column

In [3]:
# 1.1 By column name only
df1_1 = df.copy()
df1_1['com'] = df1_1.abc + ' ' + df1_1.foo 
df1_1

Unnamed: 0,id,abc,foo,com
0,1,A,foo,A foo
1,2,B,bar,B bar
2,3,C,foo,C foo
3,4,D,bar,D bar


In [4]:
# 1.2 Using assign 
df1_2 = df.copy()
df1_2 = df1_2.assign(com=df1_2.abc + '-' + df1_2.foo)
df1_2

Unnamed: 0,id,abc,foo,com
0,1,A,foo,A-foo
1,2,B,bar,B-bar
2,3,C,foo,C-foo
3,4,D,bar,D-bar


In [5]:
# 1.3 using ".at" - replacement for set_value() 
# Is this necessary?
df1_3 = df.copy()
df1_3.at[:,'com'] = df1_3.abc[:] + ' ' + df1_3.foo[:] 
df1_3

Unnamed: 0,id,abc,foo,com
0,1,A,foo,A foo
1,2,B,bar,B bar
2,3,C,foo,C foo
3,4,D,bar,D bar


### Example 2: Replacing values with NA

In [6]:
# 2.1 Using applymap()
# Project 1 cool hint - you can use this method to replace cells that contain a string, e.g. "-"
# You could probably also do this in 2.1 using a regx
df2_1 = df.copy()
df2_1 = df2_1.applymap(lambda x: np.nan if x=='foo' else x)
df2_1

Unnamed: 0,id,abc,foo
0,1,A,
1,2,B,bar
2,3,C,
3,4,D,bar


In [7]:
# 2.2 Using replace()
df2_2 = df.copy()
df2_2.foo = df2_2.foo.replace('foo',np.nan)
df2_2

Unnamed: 0,id,abc,foo
0,1,A,
1,2,B,bar
2,3,C,
3,4,D,bar
