#### Concatenation, Merging, Joining the data frames are common tasks when we are dealing with data of different dataframes when multiple data tables are in relation with each other. We can create new dataframes by  these functions

In [151]:
import pandas as pd
import numpy as np

#### Concatenation

In [152]:
dic1 = {'Names':['John', 'Sam', 'Raj', 'Dev'],
       'GPA':[67, 78, 89, 90]}
Students1 = pd.DataFrame(data = dic1)
Students1

Unnamed: 0,Names,GPA
0,John,67
1,Sam,78
2,Raj,89
3,Dev,90


In [153]:
dic2 = {'Names':['Sree', 'Radha', 'Mohan', 'Latha'],
       'GPA':[78, 67, 67, 84]}
Students2 = pd.DataFrame(data = dic2)
Students2

Unnamed: 0,Names,GPA
0,Sree,78
1,Radha,67
2,Mohan,67
3,Latha,84


In [154]:
# If we have two dataframes and want to concat them to get all the student names and thier GPA into one dataframe
pd.concat([Students1, Students2])

Unnamed: 0,Names,GPA
0,John,67
1,Sam,78
2,Raj,89
3,Dev,90
0,Sree,78
1,Radha,67
2,Mohan,67
3,Latha,84


In [155]:
RowsAdded = pd.concat([Students1, Students2], axis = 0)
RowsAdded

Unnamed: 0,Names,GPA
0,John,67
1,Sam,78
2,Raj,89
3,Dev,90
0,Sree,78
1,Radha,67
2,Mohan,67
3,Latha,84


In [156]:
# We drop the existing index which is not a sequential numbering added to new rows and inplace = True to make this happen
# in the existing dataframe itself
RowsAdded.reset_index(drop=True, inplace=True)
RowsAdded

Unnamed: 0,Names,GPA
0,John,67
1,Sam,78
2,Raj,89
3,Dev,90
4,Sree,78
5,Radha,67
6,Mohan,67
7,Latha,84


In [157]:
# By ignoring the index, we can create a new index in place
# This similar to reset_index with drop and inplace
pd.concat([Students1, Students2], ignore_index='True')

Unnamed: 0,Names,GPA
0,John,67
1,Sam,78
2,Raj,89
3,Dev,90
4,Sree,78
5,Radha,67
6,Mohan,67
7,Latha,84


#### Concatenatation side by side

In [158]:
Students1

Unnamed: 0,Names,GPA
0,John,67
1,Sam,78
2,Raj,89
3,Dev,90


In [159]:
dic3 = {'AGE': [32,33,30,29],
       'Names':['John', 'Sam', 'Raj', 'Dev']}
Students3 = pd.DataFrame(data = dic3)
Students3

Unnamed: 0,AGE,Names
0,32,John
1,33,Sam
2,30,Raj
3,29,Dev


In [160]:
pd.concat([Students1, Students3], axis = 1)

Unnamed: 0,Names,GPA,AGE,Names.1
0,John,67,32,John
1,Sam,78,33,Sam
2,Raj,89,30,Raj
3,Dev,90,29,Dev


<p>In the above we can see concatenation just addes additional columns side by side even though there are duplicated columns
 This is where, merge comes into use.</p>

In [161]:
pd.concat([Students1, Students3['AGE']], axis = 1) # Just by concatenating we can get a df 

Unnamed: 0,Names,GPA,AGE
0,John,67,32
1,Sam,78,33
2,Raj,89,30
3,Dev,90,29


#### Concat   dissimilar dataframes 

In [162]:
Students1

Unnamed: 0,Names,GPA
0,John,67
1,Sam,78
2,Raj,89
3,Dev,90


In [163]:
Students3

Unnamed: 0,AGE,Names
0,32,John
1,33,Sam
2,30,Raj
3,29,Dev


In [164]:
# You can see the NAN are being introducted , Whre ever there is no relevant data
pd.concat([Students1,Students3], axis = 0).reset_index(drop = True)

Unnamed: 0,Names,GPA,AGE
0,John,67.0,
1,Sam,78.0,
2,Raj,89.0,
3,Dev,90.0,
4,John,,32.0
5,Sam,,33.0
6,Raj,,30.0
7,Dev,,29.0


### Merging Dataframes

In [165]:

Students1

Unnamed: 0,Names,GPA
0,John,67
1,Sam,78
2,Raj,89
3,Dev,90


In [166]:
pd.merge(left = Students1, right = Students3, how = 'outer', on = ['Names'] )

Unnamed: 0,Names,GPA,AGE
0,John,67,32
1,Sam,78,33
2,Raj,89,30
3,Dev,90,29


In [167]:
# The above outer merging with a  common column Names is equivalent to Pd.concat axis = 1
pd.concat([Students1, Students3['AGE']], axis = 1) # Just by concatenating we can get a df

Unnamed: 0,Names,GPA,AGE
0,John,67,32
1,Sam,78,33
2,Raj,89,30
3,Dev,90,29


### join 

In [173]:
Students1

Unnamed: 0,Names,GPA
0,John,67
1,Sam,78
2,Raj,89
3,Dev,90


In [172]:
Students1.join(Students3, how = 'left', lsuffix = '_left', rsuffix = '_right')

Unnamed: 0,Names_left,GPA,AGE,Names_right
0,John,67,32,John
1,Sam,78,33,Sam
2,Raj,89,30,Raj
3,Dev,90,29,Dev
