# Pandas Merge Tutorial


### Basic Merge Using a DataFrame Column

In [4]:
import pandas as pd
df1 = pd.DataFrame({
    "city": ["new york","chicago","orlando"],
    "temperature": [21,14,35],
})
df1

Unnamed: 0,city,temperature
0,new york,21
1,chicago,14
2,orlando,35


In [5]:
df2 = pd.DataFrame({
    "city": ["chicago","new york","orlando"],
    "humidity": [65,68,75],
})
df2

Unnamed: 0,city,humidity
0,chicago,65
1,new york,68
2,orlando,75


In [6]:
df3 = pd.merge(df1,df2,on="city")
df3
# Like inner join

Unnamed: 0,city,temperature,humidity
0,new york,21,68
1,chicago,14,65
2,orlando,35,75


In [7]:
import pandas as pd
df4 = pd.DataFrame({
    "city": ["chicago","new york","san francisco"],
    "humidity": [65,68,71],
})
df4

Unnamed: 0,city,humidity
0,chicago,65
1,new york,68
2,san francisco,71


In [10]:
df5 = pd.merge(df2,df4,on="city")
df5

Unnamed: 0,city,humidity_x,humidity_y
0,chicago,65,65
1,new york,68,68


In [11]:
df5 = pd.merge(df2,df4,on="city",how="outer")
df5

Unnamed: 0,city,humidity_x,humidity_y
0,chicago,65.0,65.0
1,new york,68.0,68.0
2,orlando,75.0,
3,san francisco,,71.0


In [12]:
df5 = pd.merge(df2,df4,on="city",how="left")
df5

# Left Join

Unnamed: 0,city,humidity_x,humidity_y
0,chicago,65,65.0
1,new york,68,68.0
2,orlando,75,


In [13]:
df5 = pd.merge(df2,df4,on="city",how="right")
df5

# Right Join

Unnamed: 0,city,humidity_x,humidity_y
0,chicago,65.0,65
1,new york,68.0,68
2,san francisco,,71


In [14]:
df5 = pd.merge(df2,df4,on="city",how="outer", indicator = True)
df5

Unnamed: 0,city,humidity_x,humidity_y,_merge
0,chicago,65.0,65.0,both
1,new york,68.0,68.0,both
2,orlando,75.0,,left_only
3,san francisco,,71.0,right_only


## suffixes

In [15]:
df1 = pd.DataFrame({
    "city": ["new york","chicago","orlando", "baltimore"],
    "temperature": [21,14,35,38],
    "humidity": [65,68,71, 75]
})
df1

Unnamed: 0,city,temperature,humidity
0,new york,21,65
1,chicago,14,68
2,orlando,35,71
3,baltimore,38,75


In [16]:
df2 = pd.DataFrame({
    "city": ["chicago","new york","san diego"],
    "temperature": [21,14,35],
    "humidity": [65,68,71]
})
df2


Unnamed: 0,city,temperature,humidity
0,chicago,21,65
1,new york,14,68
2,san diego,35,71


In [17]:
df3= pd.merge(df1,df2,on="city")
df3

Unnamed: 0,city,temperature_x,humidity_x,temperature_y,humidity_y
0,new york,21,65,14,68
1,chicago,14,68,21,65


In [18]:
df3= pd.merge(df1,df2,on="city",how="outer", suffixes=('_first','_second'))
df3

Unnamed: 0,city,temperature_first,humidity_first,temperature_second,humidity_second
0,new york,21.0,65.0,14.0,68.0
1,chicago,14.0,68.0,21.0,65.0
2,orlando,35.0,71.0,,
3,baltimore,38.0,75.0,,
4,san diego,,,35.0,71.0
