In [1]:
import pandas as pd
import numpy as np

In [9]:
def make_df(cols, ind):
    """Quickly make a DataFrame"""
    data = {c: [str(c) + str(i) for i in ind]
            for c in cols}
    # print(data)
    return pd.DataFrame(data, ind)

# example DataFrame
make_df('ABC', range(3))

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [11]:
ser1 = pd.Series(['A', 'B', 'C'], index=[1, 2, 3])
ser2 = pd.Series(['D', 'E', 'F'], index=[4, 5, 6])
pd.concat([ser1, ser2])

1    A
2    B
3    C
4    D
5    E
6    F
dtype: object

In [19]:
df1 = make_df('AB', [1, 2])
df2 = make_df('AB', [3, 4])
pd.concat([df1,df2])

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4


In [15]:
df1

Unnamed: 0,A,B
1,A1,B1
2,A2,B2


In [17]:
df2

Unnamed: 0,A,B
3,A3,B3
4,A4,B4


In [21]:
df3 = make_df('AB', [0, 1])
df4 = make_df('CD', [0, 1])

In [23]:
pd.concat([df3,df4],axis=1)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1


### pandas preserves indexes even though it may have clones

In [32]:
df5=make_df('AB',[0,1])
df6=make_df('AB',[0,1])
pd.concat([df5,df6],axis=1)

Unnamed: 0,A,B,A.1,B.1
0,A0,B0,A0,B0
1,A1,B1,A1,B1


In [34]:
df5=make_df('AB',[0,1])
df6=make_df('AB',[0,1])
pd.concat([df5,df6])

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
0,A0,B0
1,A1,B1


If you'd like to simply verify that the indices in the result of pd.concat() do not overlap, you can specify the verify_integrity flag. With this set to True, the concatenation will raise an exception if there are duplicate indices. Here is an example, where for clarity we'll catch and print the error message:

In [39]:
try:
    pd.concat([df5, df6], verify_integrity=True)
except ValueError as e:
    print("ValueError:", e)

ValueError: Indexes have overlapping values: Index([0, 1], dtype='int64')


Sometimes the index itself does not matter, and you would prefer it to simply be ignored. This option can be specified using the ignore_index flag. With this set to true, the concatenation will create a new integer index for the resulting Series:

In [48]:
pd.concat([df5,df6],ignore_index=True) #this creates new indexes

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A0,B0
3,A1,B1


Another option is to use the keys option to specify a label for the data sources; the result will be a hierarchically indexed series containing the data:

In [55]:
pd.concat([df5,df6],keys=[1,2])

Unnamed: 0,Unnamed: 1,A,B
1,0,A0,B0
1,1,A1,B1
2,0,A0,B0
2,1,A1,B1


In [57]:
df5 = make_df('ABC', [1, 2])
df6 = make_df('BCD', [3, 4])

In [59]:
class display(object):
    """Display HTML representation of multiple objects"""
    template = """<div style="float: left; padding: 10px;">
    <p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
    </div>"""
    def __init__(self, *args):
        self.args = args
        
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)
    
    def __repr__(self):
        return '\n\n'.join(a + '\n' + repr(eval(a))
                           for a in self.args)
    

In [61]:
df5 = make_df('ABC', [1, 2])
df6 = make_df('BCD', [3, 4])
display('df5', 'df6', 'pd.concat([df5, df6])')

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2

Unnamed: 0,B,C,D
3,B3,C3,D3
4,B4,C4,D4

Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
4,,B4,C4,D4


In [63]:
display('df5', 'df6',
        "pd.concat([df5, df6], join='inner')")

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2

Unnamed: 0,B,C,D
3,B3,C3,D3
4,B4,C4,D4

Unnamed: 0,B,C
1,B1,C1
2,B2,C2
3,B3,C3
4,B4,C4


In [67]:
## append mode

In [73]:
df1.append(df6)

AttributeError: 'DataFrame' object has no attribute 'append'