In [13]:
import pandas as pd

EMPS_DF_CSV = 'ex03_emps_df.csv'

df = pd.read_csv(EMPS_DF_CSV)
df.columns = ['EmpNo', 'Name', 'Email', 'Phone', 'Location']
print(df)

   EmpNo          Name                     Email               Phone  \
0   9001  Jeff Russell   jeff.russel@company.com  +0 (982) 3454-8290   
1   9002  Jane Boorman  jane.boorman@company.com  +0 (982) 1167-9388   
2   9003    Tom Heints    tom.heints@company.com  +0 (982) 4544-8822   

          Location  
0        Amsterdam  
1         Budapest  
2  Washington D.C.  


In [14]:
data = [
    [9001, 3000],
    [9002, 2800],
    [9003, 2500]
]

salaries = pd.DataFrame(data, columns = ['EmpNo', 'Salary'])
salaries.name = 'Salary'

df = df.merge(salaries, on='EmpNo', how='left')

display(df) 

Unnamed: 0,EmpNo,Name,Email,Phone,Location,Salary
0,9001,Jeff Russell,jeff.russel@company.com,+0 (982) 3454-8290,Amsterdam,3000
1,9002,Jane Boorman,jane.boorman@company.com,+0 (982) 1167-9388,Budapest,2800
2,9003,Tom Heints,tom.heints@company.com,+0 (982) 4544-8822,Washington D.C.,2500


In [15]:
# Attempts to add `Tom Hardy` without Salary record, `name` is used to set the index for this series
# so its "compatible" with current Data Frame
tom_hardy = pd.Series({ 'EmpNo': 9004, 'Name': 'Tom Hardy', 'Email': 'tom.hardy@company.com', 'Phone': '+0 (111) 4544-8822', 'Location': 'Valencia' })

df = pd.concat([df, pd.DataFrame([tom_hardy])], ignore_index=True)

display(df)

Unnamed: 0,EmpNo,Name,Email,Phone,Location,Salary
0,9001,Jeff Russell,jeff.russel@company.com,+0 (982) 3454-8290,Amsterdam,3000.0
1,9002,Jane Boorman,jane.boorman@company.com,+0 (982) 1167-9388,Budapest,2800.0
2,9003,Tom Heints,tom.heints@company.com,+0 (982) 4544-8822,Washington D.C.,2500.0
3,9004,Tom Hardy,tom.hardy@company.com,+0 (111) 4544-8822,Valencia,


In [16]:
data = [
    [2608, 9001, 35],
    [2617, 9001, 35],
    [2620, 9001, 139],
    [2612, 9002, 95],
    [2626, 9002, 218]
]

orders = pd.DataFrame(data, columns = ['Pono', 'EmpNo', 'Total'])

display(orders)

Unnamed: 0,Pono,EmpNo,Total
0,2608,9001,35
1,2617,9001,35
2,2620,9001,139
3,2612,9002,95
4,2626,9002,218


In [17]:
data = [
    ['jeff.russel@company.com', 'Head of Sales'],
    ['jane.boorman@company.com', 'Designer'],
    ['tom.heints@company.com', 'Developer'],
    ['tom.hardy@company.com', 'Product Manager']
]

departments = pd.DataFrame(data, columns = ['Email', 'Department'])

display(departments)

Unnamed: 0,Email,Department
0,jeff.russel@company.com,Head of Sales
1,jane.boorman@company.com,Designer
2,tom.heints@company.com,Developer
3,tom.hardy@company.com,Product Manager


In [18]:
df = df.merge(departments, on='Email', how='right')

display(df)

Unnamed: 0,EmpNo,Name,Email,Phone,Location,Salary,Department
0,9001,Jeff Russell,jeff.russel@company.com,+0 (982) 3454-8290,Amsterdam,3000.0,Head of Sales
1,9002,Jane Boorman,jane.boorman@company.com,+0 (982) 1167-9388,Budapest,2800.0,Designer
2,9003,Tom Heints,tom.heints@company.com,+0 (982) 4544-8822,Washington D.C.,2500.0,Developer
3,9004,Tom Hardy,tom.hardy@company.com,+0 (111) 4544-8822,Valencia,,Product Manager


In [19]:
data = [
    ['jane.boorman@company.com', 'Remote'],
    ['tom.heints@company.com', 'Remote'],
    ['tom.hardy@company.com', 'Hybrid']
]

modality = pd.DataFrame(data, columns = ['Email', 'Modality'])

display(modality)

Unnamed: 0,Email,Modality
0,jane.boorman@company.com,Remote
1,tom.heints@company.com,Remote
2,tom.hardy@company.com,Hybrid


Will filter out `Jeff Russel` as he is not in the DataFrame for "Modality" and because its being joined using `inner`
relationship

In [20]:
df_inner = df.merge(modality, on='Email', how='inner')

display(df_inner)

Unnamed: 0,EmpNo,Name,Email,Phone,Location,Salary,Department,Modality
0,9002,Jane Boorman,jane.boorman@company.com,+0 (982) 1167-9388,Budapest,2800.0,Designer,Remote
1,9003,Tom Heints,tom.heints@company.com,+0 (982) 4544-8822,Washington D.C.,2500.0,Developer,Remote
2,9004,Tom Hardy,tom.hardy@company.com,+0 (111) 4544-8822,Valencia,,Product Manager,Hybrid


Will not filter out `Jeff Russel`, even when he is not in the DataFrame for "Modality", but because its being joined using `outer`
relationship, allowing for empty values

In [22]:
df_outer = df.merge(modality, on='Email', how='outer')

display(df_outer)

Unnamed: 0,EmpNo,Name,Email,Phone,Location,Salary,Department,Modality
0,9002,Jane Boorman,jane.boorman@company.com,+0 (982) 1167-9388,Budapest,2800.0,Designer,Remote
1,9001,Jeff Russell,jeff.russel@company.com,+0 (982) 3454-8290,Amsterdam,3000.0,Head of Sales,
2,9004,Tom Hardy,tom.hardy@company.com,+0 (111) 4544-8822,Valencia,,Product Manager,Hybrid
3,9003,Tom Heints,tom.heints@company.com,+0 (982) 4544-8822,Washington D.C.,2500.0,Developer,Remote
