In [1]:
import pandas as pd

In [2]:
men2004 = pd.read_csv('men2004.csv')

In [3]:
men2008 = pd.read_csv('men2008.csv')

In [4]:
len(men2004) + len(men2008)

121

# Merge method in pandas
## For merging two dataframes, we need to data existing in both dataframes.
## For example, we can merge the two dataframes on the basis of the athlete name.(Athlete wins in 2004 and 2008)
Parameter: 
### on = The column name on which we want to merge the dataframes. (means the column name which is common in both dataframes)
### how = The type of merge we want to perform. (inner, outer, left, right)
- inner: It will merge the dataframes on the basis of the common data in both dataframes.
- outer: It will merge the dataframes on the basis of all the data in both dataframes.
- left: It will merge the dataframes on the basis of the data in the left dataframe.
- right: It will merge the dataframes on the basis of the data in the right dataframe.
### suffixes = The suffixes we want to add to the column names of the dataframes. (If the column names are same in both dataframes)
### indicator = It will add a column to the merged dataframe to indicate the source of the data. (from which dataframe the data is coming)


# - Outer Join
## It will merge the dataframes on the basis of all the data in both dataframes.
## Include all the data from

In [5]:
men0408 = men2004.merge(men2008, on='Athlete', how='outer', suffixes=('_2004', '_2008'), indicator=True)

In [6]:
men0408

Unnamed: 0,Athlete,Medals_2004,Medals_2008,_merge
0,"ADRIAN, Nathan",,1.0,right_only
1,"BERENS, Ricky",,1.0,right_only
2,"BERNARD, Alain",,3.0,right_only
3,"BOUSQUET, Frederick",,1.0,right_only
4,"BOVELL, George",1.0,,left_only
...,...,...,...,...
100,"WOODWARD, Gabe",1.0,,left_only
101,"YAMAMOTO, Takashi",2.0,,left_only
102,"ZASTROW, Mitja",1.0,,left_only
103,"ZHANG, Lin",,1.0,right_only


#### The indicator column is showing the source of the data.
#### 46 rows are coming from right dataframe
#### 43 rows are coming from left dataframe
#### 16 rows are coming from both dataframes

In [7]:
men0408._merge.value_counts()

_merge
right_only    46
left_only     43
both          16
Name: count, dtype: int64

# - Outer Join without intersection
## It will merge the dataframes on All Athletes , who were succesful in only one Edition !!! NOT IN BOTH !!!
## Exclude the intersection of the dataframes.

In [25]:
men0408 = men2004.merge(men2008, on='Athlete', how='outer', suffixes=('_2004', '_2008'), indicator=True)

- Filter the data for obtaining the athletes who were successful in only one edition(Outer Join without intersection). Exclude 'both' -> intersection

In [15]:
men0408.loc[men0408._merge != 'both']

Unnamed: 0,Athlete,Medals_2004,Medals_2008,_merge
0,"ADRIAN, Nathan",,1.0,right_only
1,"BERENS, Ricky",,1.0,right_only
2,"BERNARD, Alain",,3.0,right_only
3,"BOUSQUET, Frederick",,1.0,right_only
4,"BOVELL, George",1.0,,left_only
...,...,...,...,...
100,"WOODWARD, Gabe",1.0,,left_only
101,"YAMAMOTO, Takashi",2.0,,left_only
102,"ZASTROW, Mitja",1.0,,left_only
103,"ZHANG, Lin",,1.0,right_only


# - Inner Join
## It will merge the dataframes on the basis of the common data in both dataframes.
## Include the intersection of the dataframes.

In [9]:
men2004.merge(men2008, on='Athlete', how='inner', suffixes=('_2004', '_2008'), indicator=True)

Unnamed: 0,Athlete,Medals_2004,Medals_2008,_merge
0,"PHELPS, Michael",8,8,both
1,"PEIRSOL, Aaron",3,3,both
2,"CROCKER, Ian",3,1,both
3,"KITAJIMA, Kosuke",3,3,both
4,"HANSEN, Brendan",3,1,both
5,"HACKETT, Grant",3,2,both
6,"LEZAK, Jason",2,3,both
7,"KELLER, Klete",2,1,both
8,"LOCHTE, Ryan",2,4,both
9,"VENDT, Erik",1,1,both


# - Left Join
## It will merge the dataframes on the basis of the data in the left dataframe and intersection.
## All Athlete, who were successful in Edition 2004(incl. 2008 Medals, Athlete who were successful in both Edition)

In [29]:
men_left = men2004.merge(men2008, on='Athlete', how='left', suffixes=('_2004', '_2008'), indicator=True)

In [30]:
men_left

Unnamed: 0,Athlete,Medals_2004,Medals_2008,_merge
0,"PHELPS, Michael",8,8.0,both
1,"THORPE, Ian",4,,left_only
2,"SCHOEMAN, Roland",3,,left_only
3,"PEIRSOL, Aaron",3,3.0,both
4,"CROCKER, Ian",3,1.0,both
5,"KITAJIMA, Kosuke",3,3.0,both
6,"HANSEN, Brendan",3,1.0,both
7,"VAN DEN HOOGENBAND, Pieter",3,,left_only
8,"HACKETT, Grant",3,2.0,both
9,"MORITA, Tomomi",2,,left_only


# - Left Join without intersection
## It will merge the dataframes on the basis of the data in the left dataframe.
## All Athlete, who were successful only in Edition 2004
### Exclude the intersection and right side data

- Filter the data for obtaining the athletes who were successful in only one edition(Left Join without intersection and right size).

In [20]:
men0408.loc[men0408._merge == 'left_only']

Unnamed: 0,Athlete,Medals,_merge
1,"THORPE, Ian",4,left_only
2,"SCHOEMAN, Roland",3,left_only
4,"CROCKER, Ian",3,left_only
6,"HANSEN, Brendan",3,left_only
7,"VAN DEN HOOGENBAND, Pieter",3,left_only
8,"HACKETT, Grant",3,left_only
9,"MORITA, Tomomi",2,left_only
10,"LEZAK, Jason",2,left_only
11,"ROGAN, Markus",2,left_only
12,"KELLER, Klete",2,left_only


# - Right Join
## It will merge the dataframes on the basis of the data in the right dataframe and intersection.
## All Athlete, who were successful in Edition 2008(incl. 2004 Medals, Athlete who were successful in both Edition)

In [31]:
men_right = men2004.merge(men2008, on='Athlete', how='right', suffixes=('_2004', '_2008'), indicator=True)

In [33]:
men_right

Unnamed: 0,Athlete,Medals_2004,Medals_2008,_merge
0,"PHELPS, Michael",8.0,8,both
1,"LOCHTE, Ryan",2.0,4,both
2,"BERNARD, Alain",,3,right_only
3,"SULLIVAN, Eamon",,3,right_only
4,"LAUTERSTEIN, Andrew",,3,right_only
...,...,...,...,...
57,"LAGUNOV, Evgeniy",,1,right_only
58,"BERENS, Ricky",,1,right_only
59,"LURZ, Thomas",,1,right_only
60,"MALLET, Gregory",,1,right_only


# - Right Join without intersection
## It will merge the dataframes on the basis of the data in the right dataframe.
## All Athlete, who were successful only in Edition 2008

In [27]:
men0408.loc[men0408._merge == 'right_only']

Unnamed: 0,Athlete,Medals_2004,Medals_2008,_merge
0,"ADRIAN, Nathan",,1.0,right_only
1,"BERENS, Ricky",,1.0,right_only
2,"BERNARD, Alain",,3.0,right_only
3,"BOUSQUET, Frederick",,1.0,right_only
6,"BRITS, Grant",,1.0,right_only
7,"BRODIE, Leith",,2.0,right_only
8,"CALLUS, Ashley",,1.0,right_only
10,"CAVIC, Milorad",,1.0,right_only
12,"CIELO FILHO, Cesar",,2.0,right_only
13,"COCHRANE, Ryan",,1.0,right_only
