In [1]:
import numpy as np
import pandas as pd

Merging two dataframes

In [2]:

employees = pd.DataFrame({
    'emp_id': [101, 102, 103, 104, 105],
    'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Ethan'],
    'department': ['HR', 'IT', 'Finance', 'IT', 'Marketing']
})

employees

Unnamed: 0,emp_id,name,department
0,101,Alice,HR
1,102,Bob,IT
2,103,Charlie,Finance
3,104,Diana,IT
4,105,Ethan,Marketing


In [3]:
compensation = pd.DataFrame({
    'emp_id': [101, 102, 104, 106],
    'salary': [60000, 75000, 80000, 72000],
    'bonus': [5000, 7000, 8000, 6000]
})

compensation


Unnamed: 0,emp_id,salary,bonus
0,101,60000,5000
1,102,75000,7000
2,104,80000,8000
3,106,72000,6000


In [4]:
# Combine employee and compensation data based on the common column (emp_id)
pd.merge(employees, compensation)

Unnamed: 0,emp_id,name,department,salary,bonus
0,101,Alice,HR,60000,5000
1,102,Bob,IT,75000,7000
2,104,Diana,IT,80000,8000


In [5]:
# Merge employee details with compensation data using emp_id as the key
# and keep only records that exist in both DataFrames (inner join)
pd.merge(employees, compensation, on='emp_id', how='inner')

Unnamed: 0,emp_id,name,department,salary,bonus
0,101,Alice,HR,60000,5000
1,102,Bob,IT,75000,7000
2,104,Diana,IT,80000,8000


In [6]:
# Merge employee and compensation data using emp_id
# and keep all records from both DataFrames, filling missing values with NaN
pd.merge(employees, compensation, on='emp_id', how='outer')

Unnamed: 0,emp_id,name,department,salary,bonus
0,101,Alice,HR,60000.0,5000.0
1,102,Bob,IT,75000.0,7000.0
2,103,Charlie,Finance,,
3,104,Diana,IT,80000.0,8000.0
4,105,Ethan,Marketing,,
5,106,,,72000.0,6000.0


In [7]:
# Merge compensation data into employees, keeping all employee records
# and adding salary/bonus where emp_id matches
pd.merge(employees, compensation, on='emp_id', how='left')


Unnamed: 0,emp_id,name,department,salary,bonus
0,101,Alice,HR,60000.0,5000.0
1,102,Bob,IT,75000.0,7000.0
2,103,Charlie,Finance,,
3,104,Diana,IT,80000.0,8000.0
4,105,Ethan,Marketing,,


In [8]:
# Merge employee data into compensation, keeping all compensation records
# and adding employee details where emp_id matches
pd.merge(employees, compensation, on='emp_id', how='right')


Unnamed: 0,emp_id,name,department,salary,bonus
0,101,Alice,HR,60000,5000
1,102,Bob,IT,75000,7000
2,104,Diana,IT,80000,8000
3,106,,,72000,6000


Concatenating two dataframes

In [9]:
jan_sales = pd.DataFrame({
    'order_id': [1001, 1002, 1003],
    'product': ['Laptop', 'Phone', 'Tablet'],
    'amount': [1200, 800, 600]
})

feb_sales = pd.DataFrame({
    'order_id': [1004, 1005, 1006],
    'product': ['Laptop', 'Phone', 'Headphones'],
    'amount': [1300, 850, 200]
})


In [10]:
jan_sales

Unnamed: 0,order_id,product,amount
0,1001,Laptop,1200
1,1002,Phone,800
2,1003,Tablet,600


In [11]:
feb_sales

Unnamed: 0,order_id,product,amount
0,1004,Laptop,1300
1,1005,Phone,850
2,1006,Headphones,200


In [12]:
# concatination in the basis of rows
pd.concat([jan_sales,feb_sales])

Unnamed: 0,order_id,product,amount
0,1001,Laptop,1200
1,1002,Phone,800
2,1003,Tablet,600
0,1004,Laptop,1300
1,1005,Phone,850
2,1006,Headphones,200


In [13]:
#concatenation in the basis of cols 
pd.concat([jan_sales,feb_sales], axis=1)

Unnamed: 0,order_id,product,amount,order_id.1,product.1,amount.1
0,1001,Laptop,1200,1004,Laptop,1300
1,1002,Phone,800,1005,Phone,850
2,1003,Tablet,600,1006,Headphones,200


**Joining dataframes**

In [14]:
employees = pd.DataFrame({
    'emp_id': [1, 2, 3, 4, 5],
    'name': ['Amit', 'Sara', 'John', 'Neha', 'Raj'],
    'department': ['HR', 'IT', 'Finance', 'IT', 'Marketing']
})

salaries = pd.DataFrame({
    'emp_id': [2, 3, 4, 6],
    'salary': [50000, 60000, 55000, 52000]
})

In [15]:
# Keep only employees who have salary details in both tables
pd.merge(employees, salaries, on='emp_id', how='inner')


Unnamed: 0,emp_id,name,department,salary
0,2,Sara,IT,50000
1,3,John,Finance,60000
2,4,Neha,IT,55000


In [16]:
# Keep all employees and add salary if emp_id matches
pd.merge(employees, salaries, on='emp_id', how='outer')


Unnamed: 0,emp_id,name,department,salary
0,1,Amit,HR,
1,2,Sara,IT,50000.0
2,3,John,Finance,60000.0
3,4,Neha,IT,55000.0
4,5,Raj,Marketing,
5,6,,,52000.0


In [17]:
# Keep all records from both tables, even if emp_id does not match
pd.merge(employees, salaries, on='emp_id', how='left')


Unnamed: 0,emp_id,name,department,salary
0,1,Amit,HR,
1,2,Sara,IT,50000.0
2,3,John,Finance,60000.0
3,4,Neha,IT,55000.0
4,5,Raj,Marketing,


In [18]:
# Keep all salary records and add employee details if emp_id matches
pd.merge(employees, salaries, on='emp_id', how='right')


Unnamed: 0,emp_id,name,department,salary
0,2,Sara,IT,50000
1,3,John,Finance,60000
2,4,Neha,IT,55000
3,6,,,52000
