In [1]:
import pandas as pd
import numpy as np

In [11]:
def make_df(cols, ind):
    """Quickly make a DataFrame"""
    data = {c: [str(c) + str(i) for i in ind] for c in cols}
    print(data)
    return pd.DataFrame(data, ind)

In [12]:
# example DataFrame
make_df('ABC', range(3))

{'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2'], 'C': ['C0', 'C1', 'C2']}


Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


## concating a series

In [15]:
ser1 = pd.Series(['A', 'B', 'C'], index=[1, 2, 3])
ser2 = pd.Series(['D', 'E', 'F'], index=[4, 5, 6])
print(ser1)
print(ser2)
pd.concat([ser1, ser2])

1    A
2    B
3    C
dtype: object
4    D
5    E
6    F
dtype: object


1    A
2    B
3    C
4    D
5    E
6    F
dtype: object

pd.concat() can be used for a simple concatenation of Series or DataFrame objects, just as np.concatenate() can be used for simple concatenations of arrays

## concating DAtaframes

In [24]:
df1 = make_df('AB', [1, 2])
df2 = make_df('AB', [3, 4])
#display('df1', 'df2', 'pd.concat([df1, df2])')
print(df1)
print(df2)
pd.concat([df1,df2],axis=0) #by default concatenation takes place row wise within the data frame (i.e axis=0)

{'A': ['A1', 'A2'], 'B': ['B1', 'B2']}
{'A': ['A3', 'A4'], 'B': ['B3', 'B4']}
    A   B
1  A1  B1
2  A2  B2
    A   B
3  A3  B3
4  A4  B4


Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4


In [23]:
pd.concat([df1,df2],axis=1)

Unnamed: 0,A,B,A.1,B.1
1,A1,B1,,
2,A2,B2,,
3,,,A3,B3
4,,,A4,B4


##  Duplicate indices

In [30]:
x = make_df('AB', [0, 1])
y = make_df('AB', [0, 1])
print(x)
print()
print(y)
print()
pd.concat([x,y])
#display('x', 'y', 'pd.concat([x, y])')

{'A': ['A0', 'A1'], 'B': ['B0', 'B1']}
{'A': ['A0', 'A1'], 'B': ['B0', 'B1']}
    A   B
0  A0  B0
1  A1  B1

    A   B
0  A0  B0
1  A1  B1



Unnamed: 0,A,B
0,A0,B0
1,A1,B1
0,A0,B0
1,A1,B1


## Catching the repeats as an error
If you'd like to simply verify that the indices in the result of pd.concat() do not overlap, you can specify the verify_integrity flag. With this set to True, the concatenation will raise an exception if there are duplicate indices. Here is an example, where for clarity we'll catch and print the error message:

In [31]:
try:
    pd.concat([x, y], verify_integrity=True)
except ValueError as e:
    print("ValueError:", e)

ValueError: Indexes have overlapping values: Int64Index([0, 1], dtype='int64')


## Ignoring the index
Sometimes the index itself does not matter, and you would prefer it to simply be ignored. This option can be specified using the ignore_index flag. With this set to true, the concatenation will create a new integer index for the resulting Series:

In [32]:
display('x', 'y', 'pd.concat([x, y], ignore_index=True)')

'x'

'y'

'pd.concat([x, y], ignore_index=True)'

In [33]:
pd.concat([x, y], ignore_index=True)

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A0,B0
3,A1,B1


## Concatenation with joins
In the simple examples we just looked at, we were mainly concatenating DataFrames with shared column names. In practice, data from different sources might have different sets of column names, and pd.concat offers several options in this case. Consider the concatenation of the following two DataFrames, which have some (but not all!) columns in common:

In [34]:
df5 = make_df('ABC', [1, 2])
df6 = make_df('BCD', [3, 4])
display('df5', 'df6', 'pd.concat([df5, df6])')

{'A': ['A1', 'A2'], 'B': ['B1', 'B2'], 'C': ['C1', 'C2']}
{'B': ['B3', 'B4'], 'C': ['C3', 'C4'], 'D': ['D3', 'D4']}


'df5'

'df6'

'pd.concat([df5, df6])'

In [35]:
print(df5)
print()
print(df6)
print()

    A   B   C
1  A1  B1  C1
2  A2  B2  C2

    B   C   D
3  B3  C3  D3
4  B4  C4  D4



In [37]:
pd.concat([df5, df6],sort=False) #default behaviour is outer join ,NAn will be used to representing data

Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
4,,B4,C4,D4


## inner join columns

In [41]:
pd.concat([df5,df6],join="inner")

Unnamed: 0,B,C
1,B1,C1
2,B2,C2
3,B3,C3
4,B4,C4


## specifying target columns

In [42]:
print(df5)
print()
print(df6)
print()
pd.concat([df5,df6.reindex(columns = df5.columns)],axis=1,ignore_index=True)

    A   B   C
1  A1  B1  C1
2  A2  B2  C2

    B   C   D
3  B3  C3  D3
4  B4  C4  D4



Unnamed: 0,0,1,2,3,4,5
1,A1,B1,C1,,,
2,A2,B2,C2,,,
3,,,,,B3,C3
4,,,,,B4,C4


## The append() method
Because direct array concatenation is so common, Series and DataFrame objects have an append method that can accomplish the same thing in fewer keystrokes. For example, rather than calling pd.concat([df1, df2]), 

In [44]:
df1.append(df2)

  df1.append(df2)


Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4
