In [2]:
import numpy as np
import pandas as pd

In [3]:
# numpy concatenate func 
arr = np.arange(12).reshape((3,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [4]:
# row bind, col long
np.concatenate([arr,arr],axis=0)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [5]:
# col bind row lon
np.concatenate([arr,arr],axis=1)

array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11]])

### Pandas concat func

In [6]:
#  3 series no index overlap
s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = pd.Series([5, 6], index=['f', 'g'])

pd.concat([s1,s2,s3]) #axis = 0 default

a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

In [7]:
pd.concat([s1,s2,s3], axis = 1) #axis = 1 default, index unioned


Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [8]:
# to intersect index use join="inner"
s4 = pd.concat([s1,s3])
s4

a    0
b    1
f    5
g    6
dtype: int64

In [9]:
pd.concat([s1,s4] , join="inner" , axis = 1)

Unnamed: 0,0,1
a,0,0
b,1,1


In [10]:
print(s1,s2,s3)

a    0
b    1
dtype: int64 c    2
d    3
e    4
dtype: int64 f    5
g    6
dtype: int64


In [13]:
# join_axis=[] includes those idxes
# not working
# pd.concat([s1,s4],axis=1, join_axes=["a","b","c"])


In [20]:
# heirarchial on concatenation with keys outermost
res = pd.concat([s1,s1,s3],keys=["one", "two","three"])
res

one    a    0
       b    1
two    a    0
       b    1
three  f    5
       g    6
dtype: int64

In [21]:
res.unstack() # innermost-> cols

Unnamed: 0,a,b,f,g
one,0.0,1.0,,
two,0.0,1.0,,
three,,,5.0,6.0


In [23]:
# along axis 1, keys become col headers
pd.concat([s1,s2,s3],axis=1,keys=["one","two","three"])

Unnamed: 0,one,two,three
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [24]:
# DFS, keys= no.of df/seri to concat
df1 = pd.DataFrame(np.arange(6).reshape(3, 2), index=['a', 'b', 'c'],
 columns=['one', 'two'])
df2 = pd.DataFrame(5 + np.arange(4).reshape(2, 2), index=['a', 'c'],
  columns=['three', 'four'])
print(df1)
print(df2)

   one  two
a    0    1
b    2    3
c    4    5
   three  four
a      5     6
c      7     8


In [27]:
pd.concat([df1,df2], axis=1, keys=["l1" ,"l2"])
# or dict of key:Df

Unnamed: 0_level_0,l1,l1,l2,l2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [29]:
# or dict of key:Df
pd.concat({"lvl1"  :df1,"lvl2" :df2}, axis=1)


Unnamed: 0_level_0,lvl1,lvl1,lvl2,lvl2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [30]:
# naming with names arg
pd.concat({"lvl1"  :df1,"lvl2" :df2}, axis=1, names=["upper" , "lower"])

upper,lvl1,lvl1,lvl2,lvl2
lower,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [31]:
# non relevant row indices
df1 = pd.DataFrame(np.random.randn(3, 4), columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.random.randn(2, 3), columns=['b', 'd', 'a'])

print(df1)
print(df2)

          a         b         c         d
0 -0.627589  0.237625  0.326476  0.348293
1 -1.637096  0.586552  0.509323  1.735901
2 -1.142428  0.727616 -0.440396 -0.925011
          b         d         a
0 -0.195333 -0.669121 -0.749050
1  0.994562 -1.013967 -1.427634


In [33]:
# with ignore_index=True reindexed lol
pd.concat([df1,df2], ignore_index=True)

Unnamed: 0,a,b,c,d
0,-0.627589,0.237625,0.326476,0.348293
1,-1.637096,0.586552,0.509323,1.735901
2,-1.142428,0.727616,-0.440396,-0.925011
3,-0.74905,-0.195333,,-0.669121
4,-1.427634,0.994562,,-1.013967
