In [1]:
# Dataframe join.

import numpy as np
import pandas as pd

df1 = pd.DataFrame(data = {"a" : [1, 2, 1, 2, 1, 2], 
                           "b" : [3, 4, 3, 4, 3, 4]}) 
df2 = pd.DataFrame(data = {"a" : [1, 3, 1, 3, 1],
                           "b" : [5, 4, 5, 4, 5]})

In [2]:
df1

Unnamed: 0,a,b
0,1,3
1,2,4
2,1,3
3,2,4
4,1,3
5,2,4


In [3]:
df2

Unnamed: 0,a,b
0,1,5
1,3,4
2,1,5
3,3,4
4,1,5


In [4]:
# "Below" joining using function concat(). 
# Index will remain the same!!

pd.concat([df1, df2])

Unnamed: 0,a,b
0,1,3
1,2,4
2,1,3
3,2,4
4,1,3
5,2,4
0,1,5
1,3,4
2,1,5
3,3,4


In [5]:
df2["c"] = 1
df2

Unnamed: 0,a,b,c
0,1,5,1
1,3,4,1
2,1,5,1
3,3,4,1
4,1,5,1


In [6]:
pd.concat([df1, df2])

Unnamed: 0,a,b,c
0,1,3,
1,2,4,
2,1,3,
3,2,4,
4,1,3,
5,2,4,
0,1,5,1.0
1,3,4,1.0
2,1,5,1.0
3,3,4,1.0


In [7]:
# Below joining of 4 dataframes: 

df = pd.concat([df1, df2, df1, df2])
df

Unnamed: 0,a,b,c
0,1,3,
1,2,4,
2,1,3,
3,2,4,
4,1,3,
5,2,4,
0,1,5,1.0
1,3,4,1.0
2,1,5,1.0
3,3,4,1.0


In [8]:
# Working with index.

df.loc[0, :]

Unnamed: 0,a,b,c
0,1,3,
0,1,5,1.0
0,1,3,
0,1,5,1.0


In [9]:
df.iloc[0, :]   # Result is in seres shape.

a    1.0
b    3.0
c    NaN
Name: 0, dtype: float64

In [10]:
df.iloc[:2, :]  # Result is in dataframe shape.

Unnamed: 0,a,b,c
0,1,3,
1,2,4,


In [11]:
# How to reset index.

df.reset_index()

Unnamed: 0,index,a,b,c
0,0,1,3,
1,1,2,4,
2,2,1,3,
3,3,2,4,
4,4,1,3,
5,5,2,4,
6,0,1,5,1.0
7,1,3,4,1.0
8,2,1,5,1.0
9,3,3,4,1.0


In [12]:
df.reset_index(drop = 1)

Unnamed: 0,a,b,c
0,1,3,
1,2,4,
2,1,3,
3,2,4,
4,1,3,
5,2,4,
6,1,5,1.0
7,3,4,1.0
8,1,5,1.0
9,3,4,1.0


In [13]:
# The original dataframe df is still unchanged.
df

Unnamed: 0,a,b,c
0,1,3,
1,2,4,
2,1,3,
3,2,4,
4,1,3,
5,2,4,
0,1,5,1.0
1,3,4,1.0
2,1,5,1.0
3,3,4,1.0


In [14]:
df.reset_index(drop = 1, inplace = True)

In [15]:
df

Unnamed: 0,a,b,c
0,1,3,
1,2,4,
2,1,3,
3,2,4,
4,1,3,
5,2,4,
6,1,5,1.0
7,3,4,1.0
8,1,5,1.0
9,3,4,1.0


In [18]:
# "Next to each other" joining using function concat().

df = pd.concat([df1, df2], axis = 1)
df

Unnamed: 0,a,b,a.1,b.1,c
0,1,3,1.0,5.0,1.0
1,2,4,3.0,4.0,1.0
2,1,3,1.0,5.0,1.0
3,2,4,3.0,4.0,1.0
4,1,3,1.0,5.0,1.0
5,2,4,,,


In [19]:
df.loc[:, "a"]

Unnamed: 0,a,a.1
0,1,1.0
1,2,3.0
2,1,1.0
3,2,3.0
4,1,1.0
5,2,


In [20]:
# Outer joining

df1 = pd.DataFrame({
    "first" : [1, 1, 1, 1],
    "second" : [1, 1, 1, 1],
    "third" : [1, 2, 1, 2],
}, index = [1, 2, 3, 4])

df2 = pd.DataFrame({
    "first" : [2, 2, 2, 2],
    "second" : [2, 2, 2, 2],
    "fourth" : [1, 3, 1, 3],
}, index = [3, 4, 5, 6])

In [21]:
df1

Unnamed: 0,first,second,third
1,1,1,1
2,1,1,2
3,1,1,1
4,1,1,2


In [22]:
df2

Unnamed: 0,first,second,fourth
3,2,2,1
4,2,2,3
5,2,2,1
6,2,2,3


In [26]:
pd.concat([df1, df2], join = "outer", axis = 0)

Unnamed: 0,first,second,third,fourth
1,1,1,1.0,
2,1,1,2.0,
3,1,1,1.0,
4,1,1,2.0,
3,2,2,,1.0
4,2,2,,3.0
5,2,2,,1.0
6,2,2,,3.0


In [27]:
pd.concat([df1, df2], join = "outer", axis = 1)

Unnamed: 0,first,second,third,first.1,second.1,fourth
1,1.0,1.0,1.0,,,
2,1.0,1.0,2.0,,,
3,1.0,1.0,1.0,2.0,2.0,1.0
4,1.0,1.0,2.0,2.0,2.0,3.0
5,,,,2.0,2.0,1.0
6,,,,2.0,2.0,3.0


In [32]:
# Inner joining

df1

Unnamed: 0,first,second,third
1,1,1,1
2,1,1,2
3,1,1,1
4,1,1,2


In [33]:
df2

Unnamed: 0,first,second,fourth
3,2,2,1
4,2,2,3
5,2,2,1
6,2,2,3


In [34]:
# axis = 1 means by rows ( = by index values).

pd.concat([df1, df2], join = "inner", axis = 1)

Unnamed: 0,first,second,third,first.1,second.1,fourth
3,1,1,1,2,2,1
4,1,1,2,2,2,3


In [35]:
# axis = 0 means by columns ( = by column values).

pd.concat([df1, df2], join = "inner", axis = 0)

Unnamed: 0,first,second
1,1,1
2,1,1
3,1,1
4,1,1
3,2,2
4,2,2
5,2,2
6,2,2


In [37]:
# Inner Left join

# Key for joining is index. "how = left" is default value.
df1.join(df2, rsuffix = "_second", how = "left")

Unnamed: 0,first,second,third,first_second,second_second,fourth
1,1,1,1,,,
2,1,1,2,,,
3,1,1,1,2.0,2.0,1.0
4,1,1,2,2.0,2.0,3.0


In [38]:
# Inner Right join

df1.join(df2, rsuffix = "_second", how = "right")

Unnamed: 0,first,second,third,first_second,second_second,fourth
3,1.0,1.0,1.0,2,2,1
4,1.0,1.0,2.0,2,2,3
5,,,,2,2,1
6,,,,2,2,3
