In [2]:
# Step 1: Create Sample Tables (Realistic Scenario)
# Passengers Table

import pandas as pd

passengers = pd.DataFrame({
    'passengerid': [1, 2, 3, 4],
    'name': ['A', 'B', 'C', 'D'],
    'pclass': [1, 3, 2, 3]
})


In [4]:
# Tickets Table

tickets = pd.DataFrame({
    'passengerid': [1, 2, 4],
    'fare': [100, 20, 15]
})


In [6]:
# Step 2: merge() (MOST IMPORTANT)
# Inner Join

inner_join = pd.merge(passengers, tickets, on = 'passengerid', how = 'inner')
inner_join

Unnamed: 0,passengerid,name,pclass,fare
0,1,A,1,100
1,2,B,3,20
2,4,D,3,15


In [8]:
# Left Join (Most Used)

left_join = pd.merge(passengers, tickets, on = 'passengerid', how = 'left')
left_join

Unnamed: 0,passengerid,name,pclass,fare
0,1,A,1,100.0
1,2,B,3,20.0
2,3,C,2,
3,4,D,3,15.0


In [10]:
# Right Join

right_join = pd.merge(passengers, tickets, on = 'passengerid', how = 'right')
right_join

Unnamed: 0,passengerid,name,pclass,fare
0,1,A,1,100
1,2,B,3,20
2,4,D,3,15


In [12]:
# Outer Join

outer_merge = pd.merge(passengers, tickets, on='passengerid', how='outer')
outer_merge

Unnamed: 0,passengerid,name,pclass,fare
0,1,A,1,100.0
1,2,B,3,20.0
2,3,C,2,
3,4,D,3,15.0


In [14]:
# Step 3: Merge on Different Column Names

tickets.rename(columns={'passengerid': 'pid'}, inplace=True)

pd.merge(passengers, tickets, left_on='passengerid', right_on='pid', how='left')

Unnamed: 0,passengerid,name,pclass,pid,fare
0,1,A,1,1.0,100.0
1,2,B,3,2.0,20.0
2,3,C,2,,
3,4,D,3,4.0,15.0


In [23]:
# Step 4: join() (Index-Based)

tickets_indexed = tickets.set_index('pid')

passengers.join(tickets_indexed, on='passengerid')

Unnamed: 0,passengerid,name,pclass,fare
0,1,A,1,100.0
1,2,B,3,20.0
2,3,C,2,
3,4,D,3,15.0


In [33]:
# Step 5: concat() (Stacking Data)
# Row-wise concat

df1 = passengers.iloc[:2]
df2 = passengers.iloc[2:]

pd.concat([df1, df2])


Unnamed: 0,passengerid,name,pclass
0,1,A,1
1,2,B,3
2,3,C,2
3,4,D,3


In [37]:
df1 = passengers.iloc[:2]
df1

Unnamed: 0,passengerid,name,pclass
0,1,A,1
1,2,B,3


In [39]:
df2 = passengers.iloc[2:]
df2

Unnamed: 0,passengerid,name,pclass
2,3,C,2
3,4,D,3


In [41]:
# Column-wise concat

extra_info = pd.DataFrame({
    'age': [22, 35, 28, 40]
})

pd.concat([passengers, extra_info], axis=1)


Unnamed: 0,passengerid,name,pclass,age
0,1,A,1,22
1,2,B,3,35
2,3,C,2,28
3,4,D,3,40
