In [3]:
import pandas as pd
import numpy as np

'''
nother kind of data combination operation is referred to interchangeably as concat‐
enation, binding, or stacking. NumPy’s concatenate function can do this with
NumPy arrays
'''

arr = np.arange(12).reshape(3,4)
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [4]:
np.concatenate([arr,arr], axis=1)

array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11]])

In [5]:
s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = pd.Series([5, 6], index=['f', 'g'])

print(s1.to_string())
print("\n", s2.to_string())
print("\n", s3.to_string())

a    0
b    1

 c    2
d    3
e    4

 f    5
g    6


In [9]:
'''
By default concat works along axis=0 , producing another Series. If you pass axis=1 ,
the result will instead be a DataFrame ( axis=1 is the columns)
'''
print(pd.concat([s1,s2,s3]).to_string())
print("\n")
pd.concat([s1,s2,s3], axis=1)

a    0
b    1
c    2
d    3
e    4
f    5
g    6




of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  import sys


Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [14]:
'''
In this case there is no overlap on the other axis, which as you can see is the sorted
union (the 'outer' join) of the indexes. You can instead intersect them by passing
join='inner'
'''

s4 = pd.concat([s1,s3])
print( s4 ,"\n")

pd.concat([s1,s4],axis=1)

a    0
b    1
f    5
g    6
dtype: int64 



of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,0,1
a,0.0,0
b,1.0,1
f,,5
g,,6


In [15]:
pd.concat([s1, s4], axis=1, join='inner')


Unnamed: 0,0,1
a,0,0
b,1,1


In [18]:
'''
In this last example, the 'f' and 'g' labels disappeared because of the join='inner'
option.
You can even specify the axes to be used on the other axes with join_axes
'''

pd.concat([s1, s4], axis=1, join_axes=[['a','c','b','e']])

Unnamed: 0,0,1
a,0.0,0.0
c,,
b,1.0,1.0
e,,


In [22]:
'''
A potential issue is that the concatenated pieces are not identifiable in the result. Sup‐
pose instead you wanted to create a hierarchical index on the concatenation axis. To
do this, use the keys argument
'''

result = pd.concat([s1,s2,s3], keys=['one','two','three'])
result

one    a    0
       b    1
two    c    2
       d    3
       e    4
three  f    5
       g    6
dtype: int64

In [23]:
result.unstack()

Unnamed: 0,a,b,c,d,e,f,g
one,0.0,1.0,,,,,
two,,,2.0,3.0,4.0,,
three,,,,,,5.0,6.0


In [24]:
'''
In the case of combining Series along axis=1 , the keys become the DataFrame column headers
'''

pd.concat([s1,s2,s3], axis=1, keys=['one','two','three'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """


Unnamed: 0,one,two,three
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [26]:
# The same logic extends to DataFrame objects

df1 = pd.DataFrame(np.arange(6).reshape(3, 2), index=['a', 'b', 'c'],columns=['one', 'two'])
df2 = pd.DataFrame(5 + np.arange(4).reshape(2, 2), index=['a', 'c'],columns=['three', 'four'])

df1


Unnamed: 0,one,two
a,0,1
b,2,3
c,4,5


In [27]:
df2

Unnamed: 0,three,four
a,5,6
c,7,8


In [29]:
pd.concat([df1,df2], axis=1, keys=['level1', 'level2'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [30]:
# If you pass a dict of objects instead of a list, the dict’s keys will be used for the keys option.

pd.concat({'level1' : df1, 'level2' : df2}, axis=1)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [31]:
'''
There are additional arguments governing how the hierarchical index is created.
For example, we can name the created axis levels with the names
argument
'''

pd.concat([df1, df2], axis=1, keys=['level1', 'level2'], names=['upper','lower'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  import sys


upper,level1,level1,level2,level2
lower,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [32]:
'''
A last consideration concerns DataFrames in which the row index does not contain
any relevant data
'''

df1 = pd.DataFrame(np.random.randn(3, 4), columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.random.randn(2, 3), columns=['b', 'd', 'a'])
pd.concat([df1, df2], ignore_index=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,a,b,c,d
0,-0.107465,0.811662,-0.475382,0.181809
1,0.073266,-0.382682,0.55745,-0.0809
2,-0.161399,-0.220922,0.647001,0.495631
3,-0.636438,-0.322627,,-0.397222
4,-0.885629,0.66826,,1.620868


In [None]:
'''


objs
List or dict of pandas objects to be concatenated; this is the only required argument

axis
Axis to concatenate along; defaults to 0 (along rows)

join
Either 'inner' or 'outer' ( 'outer' by default); whether to intersection (inner) or union
(outer) together indexes along the other axes

join_axes
Specific indexes to use for the other n–1 axes instead of performing union/intersection logic

keys
Values to associate with objects being concatenated, forming a hierarchical index along the
concatenation axis; can either be a list or array of arbitrary values, an array of tuples, or a list of
arrays (if multiple-level arrays passed in levels )

levels
Specific indexes to use as hierarchical index level or levels if keys passed

names
Names for created hierarchical levels if keys and/or levels passed

verify_integrity 
Check new axis in concatenated object for duplicates and raise exception if so; by default ( False )
allows duplicates

ignore_index
Do not preserve indexes along concatenation axis , instead producing a new
range(total_length) index
'''