In [1]:
import numpy as np
import pandas as pd

In [2]:
employees = pd.DataFrame({
    'employee_id': [1, 2, 3, 4, 5],
    'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'department': ['HR', 'IT', 'Finance', 'IT', 'Marketing']
})

employees

Unnamed: 0,employee_id,name,department
0,1,Alice,HR
1,2,Bob,IT
2,3,Charlie,Finance
3,4,David,IT
4,5,Eve,Marketing


In [3]:
salaries = pd.DataFrame({
    'employee_id': [1, 2, 4, 6, 7],
    'salary': [50000, 60000, 55000, 70000, 65000],
    'bonus': [5000, 6000, 5500, 7000, 6500]
})

salaries

Unnamed: 0,employee_id,salary,bonus
0,1,50000,5000
1,2,60000,6000
2,4,55000,5500
3,6,70000,7000
4,7,65000,6500


### Merging of 2 DataFrame
----

In [4]:
pd.merge(employees, salaries, on='employee_id') # by default it is inner join (intersection of keys from both DataFrames)

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Alice,HR,50000,5000
1,2,Bob,IT,60000,6000
2,4,David,IT,55000,5500


In [5]:
pd.merge(employees, salaries, on='employee_id', how='outer') # outer join (union of keys from both DataFrames, missing values filled with NaN)

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Alice,HR,50000.0,5000.0
1,2,Bob,IT,60000.0,6000.0
2,3,Charlie,Finance,,
3,4,David,IT,55000.0,5500.0
4,5,Eve,Marketing,,
5,6,,,70000.0,7000.0
6,7,,,65000.0,6500.0


In [6]:
pd.merge(employees, salaries, on='employee_id', how='left') # left join (all keys from the left DataFrame and matching keys from the right DataFrame)

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Alice,HR,50000.0,5000.0
1,2,Bob,IT,60000.0,6000.0
2,3,Charlie,Finance,,
3,4,David,IT,55000.0,5500.0
4,5,Eve,Marketing,,


In [7]:
pd.merge(employees, salaries, on='employee_id', how='right') # right join (all keys from the right DataFrame and matching keys from the left DataFrame)

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,Alice,HR,50000,5000
1,2,Bob,IT,60000,6000
2,4,David,IT,55000,5500
3,6,,,70000,7000
4,7,,,65000,6500


###  Concatination of 2 DataFrame:
----

In [8]:
df1 = pd.DataFrame({
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3'],
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']
})

df1

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [10]:
df2 = pd.DataFrame({
    'A': ['A4', 'A5', 'A6', 'A7'],
    'B': ['B4', 'B5', 'B6', 'B7'],
    'C': ['C4', 'C5', 'C6', 'C7'],
    'D': ['D4', 'D5', 'D6', 'D7']
})

df2

Unnamed: 0,A,B,C,D
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7


In [12]:
pd.concat([df1, df2]) # concatenate along rows (axis=0) by default, index is preserved (not reset)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7


In [11]:
pd.concat([df1, df2], ignore_index=True) # concatenate along rows (axis=0) and reset index

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [14]:
pd.concat([df1, df2],axis=1) # concatenate along columns (axis=1) and index is preserved (not reset)

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
3,A3,B3,C3,D3,A7,B7,C7,D7


### Joins
----

In [15]:
df3 = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Eve', 'Charlie']
}, index=[0, 1, 2, 3])

df3

Unnamed: 0,name
0,Alice
1,Bob
2,Eve
3,Charlie


In [21]:
df4 = pd.DataFrame({
    'age': [25, 30, 35, 28]
}, index=[1, 2, 3, 4])

df4

Unnamed: 0,age
1,25
2,30
3,35
4,28


In [23]:
df3.join(df4) # join df4 to df3 based on index (default is left join)

Unnamed: 0,name,age
0,Alice,
1,Bob,25.0
2,Eve,30.0
3,Charlie,35.0


In [24]:
df3.join(df4, how='outer') # outer join (union of keys from both DataFrames, missing values filled with NaN)

Unnamed: 0,name,age
0,Alice,
1,Bob,25.0
2,Eve,30.0
3,Charlie,35.0
4,,28.0


In [25]:
df3.join(df4, how='inner') # inner join (intersection of keys from both DataFrames)

Unnamed: 0,name,age
1,Bob,25
2,Eve,30
3,Charlie,35


In [26]:
df3.join(df4, how='right') # right join (all keys from the right DataFrame and matching keys from the left DataFrame)

Unnamed: 0,name,age
1,Bob,25
2,Eve,30
3,Charlie,35
4,,28
