# Merging DataFrames

In [2]:
import pandas as pd

In [5]:
df = pd.DataFrame([{'Name':'Chris', 'Item Purchased': 'Sponge', 'Cost': 22.50},
                   {'Name':'Kevyn', 'Item Purchased': 'Litter', 'Cost':2.50},
                   {'Name':'Filip', 'Item Purchased': 'Spoon', 'Cost':5.50}],
                   index= [1, 1, 2])
df

Unnamed: 0,Cost,Item Purchased,Name
1,22.5,Sponge,Chris
1,2.5,Litter,Kevyn
2,5.5,Spoon,Filip


In [6]:
df['Date'] = ['dec 1', 'jan 2', 'may 20']

In [9]:
df['Delivered'] = True
df

Unnamed: 0,Cost,Item Purchased,Name,Date,Delivered
1,22.5,Sponge,Chris,dec 1,True
1,2.5,Litter,Kevyn,jan 2,True
2,5.5,Spoon,Filip,may 20,True


#### A new column to be added must supply values equal to the number of rows present in the DataFrame. 

In [14]:
# df['Rating'] = [1,2] # does not work
df['Rating'] = [1,2,3] # works


#### A series object defaults missing values to None

In [18]:
df2 = df.reset_index()
df2['Date'] = pd.Series({0: 'YES', 2:'NO'})
df2

Unnamed: 0,index,Cost,Item Purchased,Name,Date,Delivered,Rating
0,1,22.5,Sponge,Chris,YES,True,1
1,1,2.5,Litter,Kevyn,,True,2
2,2,5.5,Spoon,Filip,NO,True,3


In [23]:
staffs = pd.DataFrame([{'Name':'Chris', 'Role': 'Director'},
                   {'Name':'Kevyn', 'Role': 'HR'},
                   {'Name':'Filip', 'Role': 'Researcher'}])

students = pd.DataFrame([{'Name':'John', 'School': 'Law'},
                   {'Name':'Kevyn', 'School': 'Art'},
                   {'Name':'Tory', 'School': 'Math'}])

staff_df = staffs.set_index('Name')
student_df = students.set_index('Name')
print(staff_df)
print()
print(student_df)

             Role
Name             
Chris    Director
Kevyn          HR
Filip  Researcher

      School
Name        
John     Law
Kevyn    Art
Tory    Math


In [28]:
pd.merge(staff_df, student_df, how='outer', left_index=True, right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Chris,Director,
Filip,Researcher,
John,,Law
Kevyn,HR,Art
Tory,,Math


In [30]:
pd.merge(staff_df, student_df, how='inner', left_index=True, right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Kevyn,HR,Art


In [32]:
pd.merge(staff_df, student_df, how='left', left_index=True, right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Chris,Director,
Kevyn,HR,Art
Filip,Researcher,


In [34]:
pd.merge(staff_df, student_df, how='right', left_index=True, right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
John,,Law
Kevyn,HR,Art
Tory,,Math


In [46]:
products = pd.DataFrame([{'Product ID': 4109, 'Price': 5.0, 'Product': 'Sushi Roll'},
                         {'Product ID': 1412, 'Price': 0.5, 'Product': 'Egg'},
                         {'Product ID': 8931, 'Price': 1.5, 'Product': 'Bagel'}])
products.set_index('Product ID', inplace=True)

invoices = pd.DataFrame([{'Customer': 'Ali', 'Product ID': 4109, 'Quantity': 1},
                         {'Customer': 'Eric', 'Product ID': 1412, 'Quantity': 12},
                         {'Customer': 'Ande', 'Product ID': 8931, 'Quantity': 6},
                         {'Customer': 'Sam', 'Product ID': 4109, 'Quantity': 2}])
print(products)
print('-------------------------------------')
print(invoices)
merged = pd.merge(products, invoices, how='inner', left_index=True, right_on='Product ID')
merged.assign(total=lambda x: x['Price'] * x['Quantity'])

            Price     Product
Product ID                   
4109          5.0  Sushi Roll
1412          0.5         Egg
8931          1.5       Bagel
-------------------------------------
  Customer  Product ID  Quantity
0      Ali        4109         1
1     Eric        1412        12
2     Ande        8931         6
3      Sam        4109         2


Unnamed: 0,Price,Product,Customer,Product ID,Quantity,total
0,5.0,Sushi Roll,Ali,4109,1,5.0
3,5.0,Sushi Roll,Sam,4109,2,10.0
1,0.5,Egg,Eric,1412,12,6.0
2,1.5,Bagel,Ande,8931,6,9.0


In [49]:
staffs = pd.DataFrame([{'First':'Chris', 'Last': 'Reinhardt', 'Role': 'Director'},
                       {'First':'Johnny', 'Last': 'Colt', 'Role': 'Assistant'},
                       {'First':'Trevor', 'Last': 'Noah', 'Role': 'VJ'}])

students = pd.DataFrame([{'First Name':'Chris', 'Last Name': 'Reinhardt', 'School': 'Music'},
                         {'First Name':'Connor', 'Last Name': 'Alvis', 'School': 'Art'},
                         {'First Name':'Kory', 'Last Name': 'Noah', 'School': 'Law'}])

pd.merge(staffs, students, how='inner', left_on=['First', 'Last'], right_on=['First Name', 'Last Name'])

Unnamed: 0,First,Last,Role,First Name,Last Name,School
0,Chris,Reinhardt,Director,Chris,Reinhardt,Music
