In [1]:
# Now we'll learn about concatenating along an axis
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
# First in just Numpy
# Create a matrix 
arr1 = np.arange(9).reshape((3,3))

# Show
arr1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [3]:
# Concatenate along axis 1
np.concatenate([arr1,arr1],axis=1)

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5],
       [6, 7, 8, 6, 7, 8]])

In [4]:
# Let's see other axis options
np.concatenate([arr1,arr1],axis=0)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [5]:
# Now let's see how this works in pandas

# Lets create two Series with no overlap
ser1 =  Series([0,1,2],index=['T','U','V'])

ser2 = Series([3,4],index=['X','Y'])

#Now let use concat (default is axis=0)
pd.concat([ser1,ser2])

T    0
U    1
V    2
X    3
Y    4
dtype: int64

In [6]:
# Now passing along another axis will produce a DataFrame
pd.concat([ser1,ser2],axis=1)

Unnamed: 0,0,1
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [61]:
# Lets say we wanted to add markers.keys to the concatenation result

# WE can do this with a hierarchical index
pd.concat([ser1,ser2],keys=['cat1','cat2'])

cat1  T    0
      U    1
      V    2
cat2  X    3
      Y    4
dtype: int64

In [66]:
# Along the axis=1 then these Keys become column headers
pd.concat([ser1,ser2],axis=1,keys=['cat1','cat2'])

Unnamed: 0,cat1,cat2
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [67]:
#Lastly, everything works similarly in DataFrames

dframe1 = DataFrame(np.random.randn(4,3), columns=['X', 'Y', 'Z'])
dframe2 = DataFrame(np.random.randn(3, 3), columns=['Y', 'Q', 'X'])

In [68]:
#Concat on DataFrame
pd.concat([dframe1,dframe2])

Unnamed: 0,X,Y,Z,Q
0,1.034076,0.01624,1.041374,
1,1.633481,0.554193,0.12271,
2,-0.408443,1.347397,-0.309155,
3,-1.389013,1.014543,-0.595301,
0,0.198368,-1.450197,,0.485569
1,-0.379132,-0.165424,,-0.281143
2,0.110818,-1.2311,,-1.200358


In [69]:
#If we dont care about the index info and just awnt to make a complete DataFrame, just use ignore_index
pd.concat([dframe1,dframe2],ignore_index=True)

Unnamed: 0,X,Y,Z,Q
0,1.034076,0.01624,1.041374,
1,1.633481,0.554193,0.12271,
2,-0.408443,1.347397,-0.309155,
3,-1.389013,1.014543,-0.595301,
4,0.198368,-1.450197,,0.485569
5,-0.379132,-0.165424,,-0.281143
6,0.110818,-1.2311,,-1.200358


In [None]:
#Next up: More on Combining DataFrames with Overlapping Indexes!