# Combining Dataframes in Pandas

In [1]:
import pandas as pd

In [2]:
employee_df = pd.DataFrame({
    
    'emp_id': [101, 102, 103],
    'emp_name': ['Rahul', 'John', 'Joe'],
    'dept_id': ['D101', 'D123', 'D231']
})

employee_df

Unnamed: 0,emp_id,emp_name,dept_id
0,101,Rahul,D101
1,102,John,D123
2,103,Joe,D231


In [3]:
new_hire_df = pd.DataFrame({
    'emp_id': [104,105],
    'emp_name': ['Peter','Nassir'],
    'dept_id': ['D101','D213']
})

new_hire_df

Unnamed: 0,emp_id,emp_name,dept_id
0,104,Peter,D101
1,105,Nassir,D213


### Concat can do more than Append

In [4]:
all_emp_df = pd.concat([employee_df, new_hire_df], ignore_index=True) # To keep index sequence, we add ignore_index=True

all_emp_df

Unnamed: 0,emp_id,emp_name,dept_id
0,101,Rahul,D101
1,102,John,D123
2,103,Joe,D231
3,104,Peter,D101
4,105,Nassir,D213


In [5]:
emp_contact_df = pd.DataFrame({
    'emp_id': [101,102,103, 104, 105],
    'mobile_no': ['798749274','834868506', '923586392', '908758374','6782645928']
})

emp_contact_df

Unnamed: 0,emp_id,mobile_no
0,101,798749274
1,102,834868506
2,103,923586392
3,104,908758374
4,105,6782645928


In [6]:
all_emp_df = pd.concat([all_emp_df, emp_contact_df], axis=1) # axis=1 means columnwise

all_emp_df

Unnamed: 0,emp_id,emp_name,dept_id,emp_id.1,mobile_no
0,101,Rahul,D101,101,798749274
1,102,John,D123,102,834868506
2,103,Joe,D231,103,923586392
3,104,Peter,D101,104,908758374
4,105,Nassir,D213,105,6782645928


### Merge can do more than Join

In [7]:
dept_df = pd.DataFrame({
    'dept_id': ['D101','D102','D123','D213','D312'],
    'dept_name': ['Software Engg.', 'HR', 'Marketing','Product','Strategy']
})

dept_df

Unnamed: 0,dept_id,dept_name
0,D101,Software Engg.
1,D102,HR
2,D123,Marketing
3,D213,Product
4,D312,Strategy


In [8]:
all_emp_df.merge( dept_df,on='dept_id') #By default Inner Join


Unnamed: 0,emp_id,emp_name,dept_id,emp_id.1,mobile_no,dept_name
0,101,Rahul,D101,101,798749274,Software Engg.
1,104,Peter,D101,104,908758374,Software Engg.
2,102,John,D123,102,834868506,Marketing
3,105,Nassir,D213,105,6782645928,Product


In [9]:
all_emp_df.merge( dept_df,on='dept_id', how='outer') #Other how= values are 'left', 'right' etc.

Unnamed: 0,emp_id,emp_name,dept_id,emp_id.1,mobile_no,dept_name
0,101.0,Rahul,D101,101.0,798749274.0,Software Engg.
1,104.0,Peter,D101,104.0,908758374.0,Software Engg.
2,102.0,John,D123,102.0,834868506.0,Marketing
3,103.0,Joe,D231,103.0,923586392.0,
4,105.0,Nassir,D213,105.0,6782645928.0,Product
5,,,D102,,,HR
6,,,D312,,,Strategy
