In [None]:
# ==========================
# 📌 Merging and Joining Data
# ==========================

# --- Using pd.merge() ---

# Merge on a common column (default inner join)
# pd.merge(df1, df2, on='key')

# Merge on different column names
# pd.merge(df1, df2, left_on='key1', right_on='key2')

# Inner join (only matching rows)
# pd.merge(df1, df2, on='key', how='inner')

# Left join (all rows from df1, matching from df2)
# pd.merge(df1, df2, on='key', how='left')

# Right join (all rows from df2, matching from df1)
# pd.merge(df1, df2, on='key', how='right')

# Outer join (all rows from both, with NaN where no match)
# pd.merge(df1, df2, on='key', how='outer')

# Cross join (cartesian product of two datasets)
# pd.merge(df1, df2, on='key', how='cross')

# --- Using DataFrame.join() ---

# Join on index (requires setting index beforehand)
# df1.set_index('key').join(df2.set_index('key'))

# --- Concatenation ---

# Concatenate DataFrames along rows (axis=0)
# pd.concat([df1, df2])

# Concatenate DataFrames along columns (axis=1)
# pd.concat([df1, df2], axis=1)

# Concatenate and reindex the result
# pd.concat([df1, df2], ignore_index=True)


In [None]:
import pandas as pd

# Load main dataset
df1 = pd.read_excel("Coffee.xlsx")

In [4]:
df1.head()

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
0,1,2023-01-01,07:06:11,2,5,Lower Manhattan,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg
1,2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
2,3,2023-01-01,07:14:04,2,5,Lower Manhattan,59,4.5,Drinking Chocolate,Hot chocolate,Dark chocolate Lg
3,4,2023-01-01,07:20:24,1,5,Lower Manhattan,22,2.0,Coffee,Drip coffee,Our Old Time Diner Blend Sm
4,5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg


In [5]:
# Sample product info
df2 = pd.DataFrame({
    'product_id': [32, 57, 59, 99],
    'product_name': ['Ethiopia Rg', 'Spicy Chai Lg', 'Dark Chocolate Lg', 'Unknown'],
    'supplier': ['AromaCo', 'TeaFarm', 'ChocoMelt', 'Unknown']
})


In [7]:
#  Basic merge
merged_df = pd.merge(df1, df2, on='product_id')
merged_df

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail,product_name,supplier
0,1,2023-01-01,07:06:11,2,5,Lower Manhattan,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,Ethiopia Rg,AromaCo
1,2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,Spicy Chai Lg,TeaFarm
2,3,2023-01-01,07:14:04,2,5,Lower Manhattan,59,4.5,Drinking Chocolate,Hot chocolate,Dark chocolate Lg,Dark Chocolate Lg,ChocoMelt
3,5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,Spicy Chai Lg,TeaFarm
4,14,2023-01-01,07:48:19,1,5,Lower Manhattan,57,3.1,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,Spicy Chai Lg,TeaFarm
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8841,149401,2023-06-30,18:44:49,1,3,Astoria,59,4.5,Drinking Chocolate,Hot chocolate,Dark chocolate Lg,Dark Chocolate Lg,ChocoMelt
8842,149403,2023-06-30,18:49:29,2,3,Astoria,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,Ethiopia Rg,AromaCo
8843,149408,2023-06-30,18:52:33,1,3,Astoria,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,Ethiopia Rg,AromaCo
8844,149413,2023-06-30,19:04:35,1,8,Hell's Kitchen,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,Ethiopia Rg,AromaCo


In [8]:
#  Left join (all coffee transactions, even if no matching product)
left_joined = pd.merge(df1, df2, on='product_id', how='left')
left_joined

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail,product_name,supplier
0,1,2023-01-01,07:06:11,2,5,Lower Manhattan,32,3.00,Coffee,Gourmet brewed coffee,Ethiopia Rg,Ethiopia Rg,AromaCo
1,2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,Spicy Chai Lg,TeaFarm
2,3,2023-01-01,07:14:04,2,5,Lower Manhattan,59,4.50,Drinking Chocolate,Hot chocolate,Dark chocolate Lg,Dark Chocolate Lg,ChocoMelt
3,4,2023-01-01,07:20:24,1,5,Lower Manhattan,22,2.00,Coffee,Drip coffee,Our Old Time Diner Blend Sm,,
4,5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,Spicy Chai Lg,TeaFarm
...,...,...,...,...,...,...,...,...,...,...,...,...,...
149111,149452,2023-06-30,20:18:41,2,8,Hell's Kitchen,44,2.50,Tea,Brewed herbal tea,Peppermint Rg,,
149112,149453,2023-06-30,20:25:10,2,8,Hell's Kitchen,49,3.00,Tea,Brewed Black tea,English Breakfast Lg,,
149113,149454,2023-06-30,20:31:34,1,8,Hell's Kitchen,45,3.00,Tea,Brewed herbal tea,Peppermint Lg,,
149114,149455,2023-06-30,20:57:19,1,8,Hell's Kitchen,40,3.75,Coffee,Barista Espresso,Cappuccino,,


In [9]:
#  Right join (all products even if no transaction)
right_joined = pd.merge(df1, df2, on='product_id', how='right')
right_joined

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail,product_name,supplier
0,1.0,2023-01-01,07:06:11,2.0,5.0,Lower Manhattan,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,Ethiopia Rg,AromaCo
1,51.0,2023-01-01,09:14:41,1.0,8.0,Hell's Kitchen,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,Ethiopia Rg,AromaCo
2,80.0,2023-01-01,10:00:39,2.0,8.0,Hell's Kitchen,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,Ethiopia Rg,AromaCo
3,84.0,2023-01-01,10:16:30,2.0,5.0,Lower Manhattan,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,Ethiopia Rg,AromaCo
4,199.0,2023-01-01,12:50:28,2.0,3.0,Astoria,32,3.0,Coffee,Gourmet brewed coffee,Ethiopia Rg,Ethiopia Rg,AromaCo
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8842,149320.0,2023-06-30,17:22:44,1.0,8.0,Hell's Kitchen,59,4.5,Drinking Chocolate,Hot chocolate,Dark chocolate Lg,Dark Chocolate Lg,ChocoMelt
8843,149387.0,2023-06-30,18:30:02,1.0,5.0,Lower Manhattan,59,4.5,Drinking Chocolate,Hot chocolate,Dark chocolate Lg,Dark Chocolate Lg,ChocoMelt
8844,149401.0,2023-06-30,18:44:49,1.0,3.0,Astoria,59,4.5,Drinking Chocolate,Hot chocolate,Dark chocolate Lg,Dark Chocolate Lg,ChocoMelt
8845,149416.0,2023-06-30,19:10:03,2.0,3.0,Astoria,59,4.5,Drinking Chocolate,Hot chocolate,Dark chocolate Lg,Dark Chocolate Lg,ChocoMelt


In [11]:
# Outer join (all records from both)
outer_joined = pd.merge(df1, df2, on='product_id', how='outer', indicator=True)
outer_joined

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail,product_name,supplier,_merge
0,3481.0,2023-01-07,09:44:57,1.0,8.0,Hell's Kitchen,1,18.0,Coffee beans,Organic Beans,Brazilian - Organic,,,left_only
1,3866.0,2023-01-08,07:30:17,1.0,5.0,Lower Manhattan,1,18.0,Coffee beans,Organic Beans,Brazilian - Organic,,,left_only
2,3971.0,2023-01-08,09:13:06,2.0,5.0,Lower Manhattan,1,18.0,Coffee beans,Organic Beans,Brazilian - Organic,,,left_only
3,4329.0,2023-01-08,17:48:46,1.0,3.0,Astoria,1,18.0,Coffee beans,Organic Beans,Brazilian - Organic,,,left_only
4,4573.0,2023-01-09,08:57:44,1.0,5.0,Lower Manhattan,1,18.0,Coffee beans,Organic Beans,Brazilian - Organic,,,left_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149112,149040.0,2023-06-30,11:18:31,1.0,8.0,Hell's Kitchen,87,2.1,Coffee,Barista Espresso,Ouro Brasileiro shot,,,left_only
149113,149297.0,2023-06-30,16:33:19,1.0,5.0,Lower Manhattan,87,3.0,Coffee,Barista Espresso,Ouro Brasileiro shot,,,left_only
149114,149319.0,2023-06-30,17:20:32,2.0,8.0,Hell's Kitchen,87,3.0,Coffee,Barista Espresso,Ouro Brasileiro shot,,,left_only
149115,149370.0,2023-06-30,18:10:40,1.0,8.0,Hell's Kitchen,87,3.0,Coffee,Barista Espresso,Ouro Brasileiro shot,,,left_only


In [13]:
# Join using index (if product_id is index)
df1_indexed = df1.set_index("product_id")
df2_indexed = df2.set_index("product_id")

joined_df = df1_indexed.join(df2_indexed, how="left")
joined_df

Unnamed: 0_level_0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,unit_price,product_category,product_type,product_detail,product_name,supplier
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
32,1,2023-01-01,07:06:11,2,5,Lower Manhattan,3.00,Coffee,Gourmet brewed coffee,Ethiopia Rg,Ethiopia Rg,AromaCo
57,2,2023-01-01,07:08:56,2,5,Lower Manhattan,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,Spicy Chai Lg,TeaFarm
59,3,2023-01-01,07:14:04,2,5,Lower Manhattan,4.50,Drinking Chocolate,Hot chocolate,Dark chocolate Lg,Dark Chocolate Lg,ChocoMelt
22,4,2023-01-01,07:20:24,1,5,Lower Manhattan,2.00,Coffee,Drip coffee,Our Old Time Diner Blend Sm,,
57,5,2023-01-01,07:22:41,2,5,Lower Manhattan,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg,Spicy Chai Lg,TeaFarm
...,...,...,...,...,...,...,...,...,...,...,...,...
44,149452,2023-06-30,20:18:41,2,8,Hell's Kitchen,2.50,Tea,Brewed herbal tea,Peppermint Rg,,
49,149453,2023-06-30,20:25:10,2,8,Hell's Kitchen,3.00,Tea,Brewed Black tea,English Breakfast Lg,,
45,149454,2023-06-30,20:31:34,1,8,Hell's Kitchen,3.00,Tea,Brewed herbal tea,Peppermint Lg,,
40,149455,2023-06-30,20:57:19,1,8,Hell's Kitchen,3.75,Coffee,Barista Espresso,Cappuccino,,


In [14]:
# Concatenation
pd.concat([df1, df1])                          # Duplicate rows vertically
pd.concat([df1, df2], axis=1)                  # Combine side-by-side (column-wise)
pd.concat([df1, df1], ignore_index=True)       # Reset index after row-wise concat

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
0,1,2023-01-01,07:06:11,2,5,Lower Manhattan,32,3.00,Coffee,Gourmet brewed coffee,Ethiopia Rg
1,2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
2,3,2023-01-01,07:14:04,2,5,Lower Manhattan,59,4.50,Drinking Chocolate,Hot chocolate,Dark chocolate Lg
3,4,2023-01-01,07:20:24,1,5,Lower Manhattan,22,2.00,Coffee,Drip coffee,Our Old Time Diner Blend Sm
4,5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
...,...,...,...,...,...,...,...,...,...,...,...
298227,149452,2023-06-30,20:18:41,2,8,Hell's Kitchen,44,2.50,Tea,Brewed herbal tea,Peppermint Rg
298228,149453,2023-06-30,20:25:10,2,8,Hell's Kitchen,49,3.00,Tea,Brewed Black tea,English Breakfast Lg
298229,149454,2023-06-30,20:31:34,1,8,Hell's Kitchen,45,3.00,Tea,Brewed herbal tea,Peppermint Lg
298230,149455,2023-06-30,20:57:19,1,8,Hell's Kitchen,40,3.75,Coffee,Barista Espresso,Cappuccino
