## Joing Data in Pandas

In [3]:
import pandas as pd
import numpy as np
import matplotlib as sls

In [12]:
df1 = pd.DataFrame ({'A':[1,2],'B':[3,4]})
df2 = pd.DataFrame ({'C':[5,6], 'D':[7,8]})
print (df1)
print (df2)

   A  B
0  1  3
1  2  4
   C  D
0  5  7
1  6  8


In [19]:
df3 = pd.concat([df1,df2], axis = 1)
df3

Unnamed: 0,A,B,C,D
0,1,3,5,7
1,2,4,6,8


In [35]:
df4 = pd.concat([df1,df2], axis=0)
df4

Unnamed: 0,A,B,C,D
0,1.0,3.0,,
1,2.0,4.0,,
0,,,5.0,7.0
1,,,6.0,8.0


In [36]:
df4.isna().sum()

A    2
B    2
C    2
D    2
dtype: int64

In [37]:
df4.fillna(0, inplace=True)
df4

Unnamed: 0,A,B,C,D
0,1.0,3.0,0.0,0.0
1,2.0,4.0,0.0,0.0
0,0.0,0.0,5.0,7.0
1,0.0,0.0,6.0,8.0


## Using Merge

In [64]:
df5 = pd.DataFrame({'ID':[1,2,3], 'Name':["Efosa","Daniel","Nate"]})
df5

Unnamed: 0,ID,Name
0,1,Efosa
1,2,Daniel
2,3,Nate


In [65]:
df6 = pd.DataFrame ({"Student_ID":[2,3,4], "Score":[80,90,95]})
df6

Unnamed: 0,Student_ID,Score
0,2,80
1,3,90
2,4,95


## Inner Join
Return records present in both tables based on common columns (intersecting records)

In [42]:
pd.merge(df5,df6,on='ID',how='inner')

Unnamed: 0,ID,Name,Score
0,2,Daniel,80
1,3,Nate,90


## Left Join
Returns all records present in the left table and records present in both tables

In [43]:
pd.merge(df5,df6,on="ID",how="left")

Unnamed: 0,ID,Name,Score
0,1,Efosa,
1,2,Daniel,80.0
2,3,Nate,90.0


In [45]:
df5

Unnamed: 0,ID,Name
0,1,Efosa
1,2,Daniel
2,3,Nate


In [46]:
df6

Unnamed: 0,ID,Score
0,2,80
1,3,90
2,4,95


## Right Join
Return all records present in the right table and records prsent in both tables 

In [44]:
pd.merge(df5,df6,on="ID",how="right")

Unnamed: 0,ID,Name,Score
0,2,Daniel,80
1,3,Nate,90
2,4,,95


## Outer Join
Return all records present in both tables

In [58]:
df9=pd.merge(df5,df6,left_on="ID",right_on="Student_ID", how="outer")
df9

Unnamed: 0,ID,Name,Student_ID,Score
0,1.0,Efosa,,
1,2.0,Daniel,2.0,80.0
2,3.0,Nate,3.0,90.0
3,,,4.0,95.0


In [59]:
df9.drop("Student_ID", axis=1, inplace=True)
df9

Unnamed: 0,ID,Name,Score
0,1.0,Efosa,
1,2.0,Daniel,80.0
2,3.0,Nate,90.0
3,,,95.0


## Joinging on Index

In [66]:
df5.set_index("ID", inplace=True)
df6.set_index("Student_ID", inplace=True)


In [70]:
pd.merge(df5,df6,left_index=True,right_index=True,how="outer")

Unnamed: 0,Name,Score
1,Efosa,
2,Daniel,80.0
3,Nate,90.0
4,,95.0


## Using Joins
It combines tables based on Index and is automatically left joined

In [73]:
df5.join(df6) #This worked because both tablrs have their index set to their ID already 

Unnamed: 0_level_0,Name,Score
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Efosa,
2,Daniel,80.0
3,Nate,90.0


In [74]:
df6.join(df5)

Unnamed: 0_level_0,Score,Name
Student_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
2,80,Daniel
3,90,Nate
4,95,
