# Merge DataFrames

In [2]:
import pandas as pd
df1 = pd.DataFrame({
    'cities':['newYork','chichago','Sandiego'],
    'temperature':[21,9,26]
})
df1

Unnamed: 0,cities,temperature
0,newYork,21
1,chichago,9
2,Sandiego,26


In [3]:
df2 = pd.DataFrame({
    'cities':['newYork','chichago','Sandiego'],
    'humidity':[55,49,70]
})
df2

Unnamed: 0,cities,humidity
0,newYork,55
1,chichago,49
2,Sandiego,70


In [4]:
df3=pd.merge(df1,df2,on='cities')
df3

Unnamed: 0,cities,temperature,humidity
0,newYork,21,55
1,chichago,9,49
2,Sandiego,26,70


In [5]:
# this join function is same as database join.
# now what it does actually is that it do intersection of 2 DataFrames.
# now if we provide some messy data we will know the difference

In [7]:
df1 = pd.DataFrame({
    'cities':['newYork','chichago','Sandiego','baltimore'],
    'temperature':[21,9,26,32]
})
df1

Unnamed: 0,cities,temperature
0,newYork,21
1,chichago,9
2,Sandiego,26
3,baltimore,32


In [8]:
df2 = pd.DataFrame({
    'cities':['newYork','chichago','San fransciso'],
    'humidity':[55,49,71]
})
df2

Unnamed: 0,cities,humidity
0,newYork,55
1,chichago,49
2,San fransciso,71


In [9]:
df3 = pd.merge(df1,df2,on='cities')
df3

Unnamed: 0,cities,temperature,humidity
0,newYork,21,55
1,chichago,9,49


In [10]:
# see the difference, it only join those that are present in both the dataframes, so it do intersection

# we can also do Union of these 2 DF

In [12]:
df3 = pd.merge(df1,df2, how='outer')
df3

Unnamed: 0,cities,temperature,humidity
0,San fransciso,,71.0
1,Sandiego,26.0,
2,baltimore,32.0,
3,chichago,9.0,49.0
4,newYork,21.0,55.0


In [13]:
# see now it joins the 2 DF even if the data dont match

In [14]:
df3 = pd.merge(df1,df2, how='inner')
df3

Unnamed: 0,cities,temperature,humidity
0,newYork,21,55
1,chichago,9,49


# inner join and On functions both are same as both perform intersection

In [18]:
df3=pd.merge(df1,df2,on='cities',how='left')
df3

Unnamed: 0,cities,temperature,humidity
0,newYork,21,55.0
1,chichago,9,49.0
2,Sandiego,26,
3,baltimore,32,


This left function of merge will take only left values of the defined script which is DF1

In [19]:
df3=pd.merge(df1,df2,on='cities',how='right')
df3

Unnamed: 0,cities,temperature,humidity
0,newYork,21.0,55
1,chichago,9.0,49
2,San fransciso,,71


In [21]:
# to know where the data come from like from which dataframe. we can simply apply a function
df3=pd.merge(df1,df2,on='cities',how='outer' , indicator=True)
df3

Unnamed: 0,cities,temperature,humidity,_merge
0,San fransciso,,71.0,right_only
1,Sandiego,26.0,,left_only
2,baltimore,32.0,,left_only
3,chichago,9.0,49.0,both
4,newYork,21.0,55.0,both


# suffixes

In [22]:
df1 = pd.DataFrame({
    'cities':['newYork','chichago','orlamb','baltimore'],
    'temperature':[21,9,26,32],
    'humidity':[65,68,71,78]
})
df1

Unnamed: 0,cities,temperature,humidity
0,newYork,21,65
1,chichago,9,68
2,orlamb,26,71
3,baltimore,32,78


In [25]:
df2 = pd.DataFrame({
    'cities':['newYork','chichago','sandiego'],
    'temperature':[20,10,30],
    'humidity':[55,58,60]
})
df2

Unnamed: 0,cities,temperature,humidity
0,newYork,20,55
1,chichago,10,58
2,sandiego,30,60


In [28]:
 df3=pd.merge(df1,df2,on='cities',suffixes=('_left','_right'))
df3

Unnamed: 0,cities,temperature_left,humidity_left,temperature_right,humidity_right
0,newYork,21,65,20,55
1,chichago,9,68,10,58
