In [1]:
# Now we'll learn about concatenating along an axis
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [3]:
# First in just Numpy

In [2]:
# Create a matrix 
arr1 = np.arange(9).reshape((3,3))

In [3]:
# Show
arr1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [4]:
# Concatenate along axis 1
np.concatenate([arr1,arr1],axis=1)

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5],
       [6, 7, 8, 6, 7, 8]])

In [5]:
# Let's see other axis options
np.concatenate([arr1,arr1],axis=0)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [8]:
# Now let's see how this works in pandas

In [16]:
# Lets create two Series with no overlap
ser1 =  Series([0,1,2],index=['T','U','V'])

ser2 = Series([3,4],index=['X','Y'])
ser3 = Series([9,8,10],index=['T', 'X', 'W'])

#Now let use concat (default is axis=0)
pd.concat([ser1,ser2,ser3])

T     0
U     1
V     2
X     3
Y     4
T     9
X     8
W    10
dtype: int64

In [20]:
ser4 = pd.concat([ser1,ser2,ser3])
ser4[5]

9

In [21]:
# Now passing along another axis will produce a DataFrame
pd.concat([ser1,ser2,ser3],axis=1)

Unnamed: 0,0,1,2
T,0.0,,9.0
U,1.0,,
V,2.0,,
W,,,10.0
X,,3.0,8.0
Y,,4.0,


In [17]:
# We can specify which specific axes to be used
pd.concat([ser1,ser2],axis=1,join_axes=[['U','V','Y']])

Unnamed: 0,0,1
U,1.0,
V,2.0,
Y,,4.0


In [32]:
# Lets say we wanted to add markers.keys to the concatenation result

# WE can do this with a hierarchical index
pd.concat([ser1,ser2,ser3],keys=['cat1','cat2','cat3'])

cat1  T     0
      U     1
      V     2
cat2  X     3
      Y     4
cat3  T     9
      X     8
      W    10
dtype: int64

In [33]:
sri_keys = pd.concat([ser1,ser2,ser3],keys=['cat1','cat2','cat3'])
sri_keys['cat1'][2]


2

In [36]:
# Along the axis=1 then these Keys become column headers
pd.concat([ser1,ser2,ser3],axis=1,keys=['cat1','cat2','cat3'])

Unnamed: 0,cat1,cat2,cat3
T,0.0,,9.0
U,1.0,,
V,2.0,,
W,,,10.0
X,,3.0,8.0
Y,,4.0,


In [38]:
#Lastly, everything works similarly in DataFrames

dframe1 = DataFrame(np.random.randn(4,3), columns=['X', 'Y', 'Z'])
dframe2 = DataFrame(np.random.randn(3, 3), columns=['Y', 'Q', 'X'])

In [39]:
dframe1

Unnamed: 0,X,Y,Z
0,0.463865,0.435782,-0.893958
1,-0.037452,-0.546519,1.163291
2,0.133862,-0.195552,-0.849864
3,1.364951,0.464517,0.496165


In [40]:
dframe2

Unnamed: 0,Y,Q,X
0,0.52805,-0.416022,-1.281346
1,-0.484191,0.030627,-0.004297
2,2.010998,-0.403618,-0.911767


In [41]:
#Concat on DataFrame
pd.concat([dframe1,dframe2])

Unnamed: 0,Q,X,Y,Z
0,,0.463865,0.435782,-0.893958
1,,-0.037452,-0.546519,1.163291
2,,0.133862,-0.195552,-0.849864
3,,1.364951,0.464517,0.496165
0,-0.416022,-1.281346,0.52805,
1,0.030627,-0.004297,-0.484191,
2,-0.403618,-0.911767,2.010998,


In [44]:
ser5 = pd.concat([dframe1,dframe2])
ser5.index

Int64Index([0, 1, 2, 3, 0, 1, 2], dtype='int64')

In [45]:
#If we dont care about the index info and just want to make a complete DataFrame, just use ignore_index
pd.concat([dframe1,dframe2],ignore_index=True)

Unnamed: 0,Q,X,Y,Z
0,,0.463865,0.435782,-0.893958
1,,-0.037452,-0.546519,1.163291
2,,0.133862,-0.195552,-0.849864
3,,1.364951,0.464517,0.496165
4,-0.416022,-1.281346,0.52805,
5,0.030627,-0.004297,-0.484191,
6,-0.403618,-0.911767,2.010998,


In [18]:
#For more info in documentation:
url='http://pandas.pydata.org/pandas-docs/stable/generated/pandas.concat.html'

In [None]:
#Next up: More on Combining DataFrames with Overlapping Indexes!