In [1]:
import pandas as pd

## join

In [3]:
var1 = pd.DataFrame({'A':[1,2,3,4],'B':[11,12,13,14]})
var2 = pd.DataFrame({'C':[10,11,13,14],'D':[21,22,23,24]})

# joining (all must be of same length)
var1.join(var2)

Unnamed: 0,A,B,C,D
0,1,11,10,21
1,2,12,11,22
2,3,13,13,23
3,4,14,14,24


In [5]:
# when they are not same length then we must apply index
var3 = pd.DataFrame({'A':[1,2,3,4],'B':[11,12,13,14]},index=['a','b','c','d'])
var4 = pd.DataFrame({'C':[11,12],'D':[21,22]})

# joining
var3.join(var4)

Unnamed: 0,A,B,C,D
a,1,11,,
b,2,12,,
c,3,13,,
d,4,14,,


In [6]:
# var2 is in front
var2.join(var1)

Unnamed: 0,C,D,A,B
0,10,21,1,11
1,11,22,2,12
2,13,23,3,13
3,14,24,4,14


In [7]:
# var4 is in front
var4.join(var3)

Unnamed: 0,C,D,A,B
0,11,21,,
1,12,22,,


In [8]:
# using how='left'
var4.join(var3, how='left')

Unnamed: 0,C,D,A,B
0,11,21,,
1,12,22,,


In [10]:
# using how='right'
var4.join(var3,how='right')

Unnamed: 0,C,D,A,B
a,,,1,11
b,,,2,12
c,,,3,13
d,,,4,14


In [11]:
# using how='outer' (union)
var4.join(var3,how='outer')

Unnamed: 0,C,D,A,B
0,11.0,21.0,,
1,12.0,22.0,,
a,,,1.0,11.0
b,,,2.0,12.0
c,,,3.0,13.0
d,,,4.0,14.0


In [14]:
# using how='inner' (intersection)
var4.join(var3,how='inner')

Unnamed: 0,C,D,A,B


In [15]:
# another
var2.join(var1,how='inner')

Unnamed: 0,C,D,A,B
0,10,21,1,11
1,11,22,2,12
2,13,23,3,13
3,14,24,4,14


In [17]:
# when the column name are same but lenght are not equal then 
# an error comes 
var5 = pd.DataFrame({'A':[1,2,3,4],'B':[11,12,13,14]},index=['a','b','c','d'])
var6 = pd.DataFrame({'C':[10,20],'B':[11,22]},index=['a','b'])

var6.join(var5, how='inner')

ValueError: columns overlap but no suffix specified: Index(['B'], dtype='object')

In [18]:
# to remove this error
# we use suffix parameter
var6.join(var5, how='inner', lsuffix='_hi')

Unnamed: 0,C,B_hi,A,B
a,10,11,1,11
b,20,22,2,12


In [19]:
# using how='outer'
var6.join(var5, how='outer', lsuffix='_hi')

Unnamed: 0,C,B_hi,A,B
a,10.0,11.0,1,11
b,20.0,22.0,2,12
c,,,3,13
d,,,4,14


In [20]:
# using rsuffix
var6.join(var5, how='outer', rsuffix='_123')

Unnamed: 0,C,B,A,B_123
a,10.0,11.0,1,11
b,20.0,22.0,2,12
c,,,3,13
d,,,4,14


## Append
- when last error comes it corrects that

In [21]:
var1 = pd.DataFrame({'A':[1,2,3,4],'B':[11,12,13,14]}, index=['a','b','c','d'])
var2 = pd.DataFrame({'C':[10,20], 'B':[11,22]}, index=['a','b'])

var1.append(var2)

Unnamed: 0,A,B,C
a,1.0,11,
b,2.0,12,
c,3.0,13,
d,4.0,14,
a,,11,10.0
b,,22,20.0


In [23]:
# without using index
var1 = pd.DataFrame({'A':[1,2,3,4],'B':[11,12,13,14]})
var2 = pd.DataFrame({'C':[10,20],'B':[11,21]})

var1.append(var2)

Unnamed: 0,A,B,C
0,1.0,11,
1,2.0,12,
2,3.0,13,
3,4.0,14,
0,,11,10.0
1,,21,20.0


In [24]:
# ignoring index
var1.append(var2, ignore_index=True)

Unnamed: 0,A,B,C
0,1.0,11,
1,2.0,12,
2,3.0,13,
3,4.0,14,
4,,11,10.0
5,,21,20.0


In [25]:
# another example
var1 = pd.DataFrame({'A':[1,2,3,4],'B':[11,12,13,14]})
var2 = pd.DataFrame({'C':[10,20],'D':[11,22]})

var1.append(var2, ignore_index=True)

Unnamed: 0,A,B,C,D
0,1.0,11.0,,
1,2.0,12.0,,
2,3.0,13.0,,
3,4.0,14.0,,
4,,,10.0,11.0
5,,,20.0,22.0
