Sorting and Filtering Data

Filtering Data
Definition: Selecting specific rows that match a condition.
Examples:
Get all rows where price > 100
Get all rows where category is "Electronics"
Get rows where age is between 18 and 25


Sorting Data
Definition: Rearranging the rows based on the values in one or more columns.
Examples:
Sort products by price (low to high)
Sort students by marks in descending order
Sort sales by region and date


In [None]:
# ============================== Sorting & Filtering Data in Pandas ==============================

# --- Sorting Data ---
# Sort by values in ascending order
# df.sort_values(by='column')

# Sort by values in descending order
# df.sort_values(by='column', ascending=False)

# Sort by multiple columns
# df.sort_values(by=['col1', 'col2'])

# Sort by index (ascending)
# df.sort_index()

# Sort by index (descending)
# df.sort_index(ascending=False)

# --- Filtering Data (Row Selection) ---
# Filter rows where column is greater than 100
# df[df['column'] > 100]

# Filter where column is equal to a specific value
# df[df['column'] == 'value']

# Filter where column value is in a list
# df[df['column'].isin(['A', 'B'])]

# Filter rows where column contains specific text
# df[df['column'].str.contains('text')]

# --- Filtering with Multiple Conditions ---
# AND condition
# df[(df['col1'] > 50) & (df['col2'] < 100)]

# OR condition
# df[(df['col1'] == 'A') | (df['col2'] == 'B')]

# --- Using the Query Method ---
# Query with a single condition
# df.query("column > 100")

# Query with complex condition
# df.query("col1 == 'A' and col2 < 50")

# --- Using loc & iloc ---
# Label-based filtering with loc
# df.loc[df['column'] == 'value']

# Index-based filtering with iloc (first 5 rows)
# df.iloc[0:5]

# --- Drop Duplicates for Unique Rows ---
# Drop all duplicate rows
# df.drop_duplicates()

# Drop duplicates based on specific column
# df.drop_duplicates(subset='column')

# --- Filtering with Between, Not, and Negation ---
# Filter values between 10 and 20 (inclusive)
# df[df['column'].between(10, 20)]

# Filter rows where column NOT in list
# df[~df['column'].isin(['A', 'B'])]


In [1]:
import pandas as pd

In [3]:
df = pd.read_excel("Coffee.xlsx")
df

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
0,1,2023-01-01,07:06:11,2,5,Lower Manhattan,32,3.00,Coffee,Gourmet brewed coffee,Ethiopia Rg
1,2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
2,3,2023-01-01,07:14:04,2,5,Lower Manhattan,59,4.50,Drinking Chocolate,Hot chocolate,Dark chocolate Lg
3,4,2023-01-01,07:20:24,1,5,Lower Manhattan,22,2.00,Coffee,Drip coffee,Our Old Time Diner Blend Sm
4,5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
...,...,...,...,...,...,...,...,...,...,...,...
149111,149452,2023-06-30,20:18:41,2,8,Hell's Kitchen,44,2.50,Tea,Brewed herbal tea,Peppermint Rg
149112,149453,2023-06-30,20:25:10,2,8,Hell's Kitchen,49,3.00,Tea,Brewed Black tea,English Breakfast Lg
149113,149454,2023-06-30,20:31:34,1,8,Hell's Kitchen,45,3.00,Tea,Brewed herbal tea,Peppermint Lg
149114,149455,2023-06-30,20:57:19,1,8,Hell's Kitchen,40,3.75,Coffee,Barista Espresso,Cappuccino


In [4]:
df.sort_values(by='unit_price')                  
# Sort by values in ascending order

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
50890,50997,2023-03-25,16:44:01,2,5,Lower Manhattan,65,0.8,Flavours,Sugar free syrup,Sugar Free Vanilla syrup
50919,51026,2023-03-25,17:17:09,1,5,Lower Manhattan,65,0.8,Flavours,Sugar free syrup,Sugar Free Vanilla syrup
50957,51064,2023-03-25,18:15:23,1,5,Lower Manhattan,65,0.8,Flavours,Sugar free syrup,Sugar Free Vanilla syrup
107713,107967,2023-05-26,07:32:42,1,5,Lower Manhattan,64,0.8,Flavours,Regular syrup,Hazelnut syrup
107721,107975,2023-05-26,07:39:02,1,5,Lower Manhattan,64,0.8,Flavours,Regular syrup,Hazelnut syrup
...,...,...,...,...,...,...,...,...,...,...,...
23697,23754,2023-02-12,09:02:14,1,8,Hell's Kitchen,8,45.0,Coffee beans,Premium Beans,Civet Cat
64674,64844,2023-04-12,15:16:12,1,5,Lower Manhattan,8,45.0,Coffee beans,Premium Beans,Civet Cat
103464,103718,2023-05-22,08:20:52,1,5,Lower Manhattan,8,45.0,Coffee beans,Premium Beans,Civet Cat
48232,48339,2023-03-21,20:52:55,1,8,Hell's Kitchen,8,45.0,Coffee beans,Premium Beans,Civet Cat


In [5]:
df.sort_values(by='unit_price', ascending=False) 
# Sort by values in descending order

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
103464,103718,2023-05-22,08:20:52,1,5,Lower Manhattan,8,45.0,Coffee beans,Premium Beans,Civet Cat
13832,13862,2023-01-25,08:49:41,1,8,Hell's Kitchen,8,45.0,Coffee beans,Premium Beans,Civet Cat
73990,74160,2023-04-23,11:23:14,1,8,Hell's Kitchen,8,45.0,Coffee beans,Premium Beans,Civet Cat
148573,148914,2023-06-30,10:25:26,1,8,Hell's Kitchen,8,45.0,Coffee beans,Premium Beans,Civet Cat
103238,103492,2023-05-21,20:52:55,1,8,Hell's Kitchen,8,45.0,Coffee beans,Premium Beans,Civet Cat
...,...,...,...,...,...,...,...,...,...,...,...
127611,127948,2023-06-12,19:11:38,1,8,Hell's Kitchen,63,0.8,Flavours,Regular syrup,Carmel syrup
53675,53784,2023-03-30,06:42:35,1,5,Lower Manhattan,63,0.8,Flavours,Regular syrup,Carmel syrup
53691,53800,2023-03-30,07:07:32,2,8,Hell's Kitchen,63,0.8,Flavours,Regular syrup,Carmel syrup
98348,98602,2023-05-17,12:50:44,1,5,Lower Manhattan,63,0.8,Flavours,Regular syrup,Carmel syrup


In [6]:
df.sort_values(by=['unit_price', 'transaction_id'])   
# Sort by multiple columns

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
3254,3284,2023-01-07,06:27:23,2,5,Lower Manhattan,64,0.8,Flavours,Regular syrup,Hazelnut syrup
3291,3321,2023-01-07,07:44:17,1,5,Lower Manhattan,63,0.8,Flavours,Regular syrup,Carmel syrup
3318,3348,2023-01-07,08:09:37,1,8,Hell's Kitchen,65,0.8,Flavours,Sugar free syrup,Sugar Free Vanilla syrup
3325,3355,2023-01-07,08:18:07,1,5,Lower Manhattan,65,0.8,Flavours,Sugar free syrup,Sugar Free Vanilla syrup
3331,3361,2023-01-07,08:20:30,1,5,Lower Manhattan,64,0.8,Flavours,Regular syrup,Hazelnut syrup
...,...,...,...,...,...,...,...,...,...,...,...
144743,145080,2023-06-27,07:49:34,1,5,Lower Manhattan,8,45.0,Coffee beans,Premium Beans,Civet Cat
145411,145748,2023-06-27,12:12:41,1,5,Lower Manhattan,8,45.0,Coffee beans,Premium Beans,Civet Cat
148573,148914,2023-06-30,10:25:26,1,8,Hell's Kitchen,8,45.0,Coffee beans,Premium Beans,Civet Cat
148702,149043,2023-06-30,11:18:31,8,8,Hell's Kitchen,8,45.0,Coffee beans,Premium Beans,Civet Cat


In [7]:
df.sort_index()                              
# Sort by index (ascending)

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
0,1,2023-01-01,07:06:11,2,5,Lower Manhattan,32,3.00,Coffee,Gourmet brewed coffee,Ethiopia Rg
1,2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
2,3,2023-01-01,07:14:04,2,5,Lower Manhattan,59,4.50,Drinking Chocolate,Hot chocolate,Dark chocolate Lg
3,4,2023-01-01,07:20:24,1,5,Lower Manhattan,22,2.00,Coffee,Drip coffee,Our Old Time Diner Blend Sm
4,5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
...,...,...,...,...,...,...,...,...,...,...,...
149111,149452,2023-06-30,20:18:41,2,8,Hell's Kitchen,44,2.50,Tea,Brewed herbal tea,Peppermint Rg
149112,149453,2023-06-30,20:25:10,2,8,Hell's Kitchen,49,3.00,Tea,Brewed Black tea,English Breakfast Lg
149113,149454,2023-06-30,20:31:34,1,8,Hell's Kitchen,45,3.00,Tea,Brewed herbal tea,Peppermint Lg
149114,149455,2023-06-30,20:57:19,1,8,Hell's Kitchen,40,3.75,Coffee,Barista Espresso,Cappuccino


In [None]:
df.sort_index(ascending=False)               
# Sort by index (descending)

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
149115,149456,2023-06-30,20:57:19,2,8,Hell's Kitchen,64,0.80,Flavours,Regular syrup,Hazelnut syrup
149114,149455,2023-06-30,20:57:19,1,8,Hell's Kitchen,40,3.75,Coffee,Barista Espresso,Cappuccino
149113,149454,2023-06-30,20:31:34,1,8,Hell's Kitchen,45,3.00,Tea,Brewed herbal tea,Peppermint Lg
149112,149453,2023-06-30,20:25:10,2,8,Hell's Kitchen,49,3.00,Tea,Brewed Black tea,English Breakfast Lg
149111,149452,2023-06-30,20:18:41,2,8,Hell's Kitchen,44,2.50,Tea,Brewed herbal tea,Peppermint Rg
...,...,...,...,...,...,...,...,...,...,...,...
4,5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
3,4,2023-01-01,07:20:24,1,5,Lower Manhattan,22,2.00,Coffee,Drip coffee,Our Old Time Diner Blend Sm
2,3,2023-01-01,07:14:04,2,5,Lower Manhattan,59,4.50,Drinking Chocolate,Hot chocolate,Dark chocolate Lg
1,2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg


In [9]:
df[df['unit_price'] > 10]                       
# Filter rows based on condition

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
3293,3323,2023-01-07,07:44:17,1,5,Lower Manhattan,6,21.00,Coffee beans,Gourmet Beans,Ethiopia
3296,3326,2023-01-07,07:45:15,1,5,Lower Manhattan,9,28.00,Coffee beans,Organic Beans,Organic Decaf Blend
3302,3332,2023-01-07,07:50:42,1,5,Lower Manhattan,9,28.00,Coffee beans,Organic Beans,Organic Decaf Blend
3408,3438,2023-01-07,09:05:19,1,5,Lower Manhattan,9,28.00,Coffee beans,Organic Beans,Organic Decaf Blend
3447,3477,2023-01-07,09:42:52,1,5,Lower Manhattan,7,19.75,Coffee beans,Premium Beans,Jamacian Coffee River
...,...,...,...,...,...,...,...,...,...,...,...
148739,149080,2023-06-30,11:42:44,1,5,Lower Manhattan,2,18.00,Coffee beans,House blend Beans,Our Old Time Diner Blend
148784,149125,2023-06-30,13:03:56,1,5,Lower Manhattan,8,45.00,Coffee beans,Premium Beans,Civet Cat
148927,149268,2023-06-30,15:59:13,1,3,Astoria,7,19.75,Coffee beans,Premium Beans,Jamacian Coffee River
149064,149405,2023-06-30,18:49:29,1,3,Astoria,3,14.75,Coffee beans,Espresso Beans,Espresso Roast


In [13]:

df['product_category'].unique

<bound method Series.unique of 0                     Coffee
1                        Tea
2         Drinking Chocolate
3                     Coffee
4                        Tea
                 ...        
149111                   Tea
149112                   Tea
149113                   Tea
149114                Coffee
149115              Flavours
Name: product_category, Length: 149116, dtype: object>

In [None]:
df[df['product_type'] == 'Brewed Chai tea']                  
# Filter where column equals a value

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
1,2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
4,5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
10,11,2023-01-01,07:43:05,1,5,Lower Manhattan,56,2.55,Tea,Brewed Chai tea,Spicy Eye Opener Chai Rg
13,14,2023-01-01,07:48:19,1,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
21,22,2023-01-01,08:24:26,2,5,Lower Manhattan,56,2.55,Tea,Brewed Chai tea,Spicy Eye Opener Chai Rg
...,...,...,...,...,...,...,...,...,...,...,...
149054,149395,2023-06-30,18:38:35,2,3,Astoria,54,2.50,Tea,Brewed Chai tea,Morning Sunrise Chai Rg
149078,149419,2023-06-30,19:15:30,1,3,Astoria,53,3.00,Tea,Brewed Chai tea,Traditional Blend Chai Lg
149099,149440,2023-06-30,19:46:02,2,8,Hell's Kitchen,55,4.00,Tea,Brewed Chai tea,Morning Sunrise Chai Lg
149107,149448,2023-06-30,19:57:49,2,3,Astoria,54,2.50,Tea,Brewed Chai tea,Morning Sunrise Chai Rg


In [14]:
df[df['product_category'].isin(['Tea', 'Flavours'])]            
# Filter where column is in a list

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
1,2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
4,5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
10,11,2023-01-01,07:43:05,1,5,Lower Manhattan,56,2.55,Tea,Brewed Chai tea,Spicy Eye Opener Chai Rg
12,13,2023-01-01,07:45:51,1,5,Lower Manhattan,51,3.00,Tea,Brewed Black tea,Earl Grey Lg
13,14,2023-01-01,07:48:19,1,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
...,...,...,...,...,...,...,...,...,...,...,...
149110,149451,2023-06-30,20:16:31,2,5,Lower Manhattan,42,2.50,Tea,Brewed herbal tea,Lemon Grass Rg
149111,149452,2023-06-30,20:18:41,2,8,Hell's Kitchen,44,2.50,Tea,Brewed herbal tea,Peppermint Rg
149112,149453,2023-06-30,20:25:10,2,8,Hell's Kitchen,49,3.00,Tea,Brewed Black tea,English Breakfast Lg
149113,149454,2023-06-30,20:31:34,1,8,Hell's Kitchen,45,3.00,Tea,Brewed herbal tea,Peppermint Lg


In [17]:
df[df['product_type'].str.contains('Brewed Chai tea')]        
# Filter rows containing text

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
1,2,2023-01-01,07:08:56,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
4,5,2023-01-01,07:22:41,2,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
10,11,2023-01-01,07:43:05,1,5,Lower Manhattan,56,2.55,Tea,Brewed Chai tea,Spicy Eye Opener Chai Rg
13,14,2023-01-01,07:48:19,1,5,Lower Manhattan,57,3.10,Tea,Brewed Chai tea,Spicy Eye Opener Chai Lg
21,22,2023-01-01,08:24:26,2,5,Lower Manhattan,56,2.55,Tea,Brewed Chai tea,Spicy Eye Opener Chai Rg
...,...,...,...,...,...,...,...,...,...,...,...
149054,149395,2023-06-30,18:38:35,2,3,Astoria,54,2.50,Tea,Brewed Chai tea,Morning Sunrise Chai Rg
149078,149419,2023-06-30,19:15:30,1,3,Astoria,53,3.00,Tea,Brewed Chai tea,Traditional Blend Chai Lg
149099,149440,2023-06-30,19:46:02,2,8,Hell's Kitchen,55,4.00,Tea,Brewed Chai tea,Morning Sunrise Chai Lg
149107,149448,2023-06-30,19:57:49,2,3,Astoria,54,2.50,Tea,Brewed Chai tea,Morning Sunrise Chai Rg


In [19]:
df[(df['transaction_qty'] > 2) & (df['unit_price'] < 10)]
# AND condition

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
3842,3872,2023-01-08,07:37:12,3,5,Lower Manhattan,63,0.80,Flavours,Regular syrup,Carmel syrup
3977,4007,2023-01-08,09:34:26,3,5,Lower Manhattan,63,0.80,Flavours,Regular syrup,Carmel syrup
4088,4118,2023-01-08,10:59:55,3,5,Lower Manhattan,65,0.80,Flavours,Sugar free syrup,Sugar Free Vanilla syrup
4163,4193,2023-01-08,12:58:32,4,5,Lower Manhattan,65,0.80,Flavours,Sugar free syrup,Sugar Free Vanilla syrup
4352,4382,2023-01-08,19:32:15,4,5,Lower Manhattan,64,0.80,Flavours,Regular syrup,Hazelnut syrup
...,...,...,...,...,...,...,...,...,...,...,...
148904,149245,2023-06-30,15:32:47,3,5,Lower Manhattan,58,3.50,Drinking Chocolate,Hot chocolate,Dark chocolate Rg
148943,149284,2023-06-30,16:22:45,3,5,Lower Manhattan,23,2.50,Coffee,Drip coffee,Our Old Time Diner Blend Rg
148944,149285,2023-06-30,16:23:49,3,5,Lower Manhattan,25,2.20,Coffee,Organic brewed coffee,Brazilian Sm
148988,149329,2023-06-30,17:31:23,3,5,Lower Manhattan,44,2.50,Tea,Brewed herbal tea,Peppermint Rg


In [44]:
df[(df['transaction_qty'] >6) | (df['unit_price'] > 15)]# OR condition

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
3293,3323,2023-01-07,07:44:17,1,5,Lower Manhattan,6,21.00,Coffee beans,Gourmet Beans,Ethiopia
3296,3326,2023-01-07,07:45:15,1,5,Lower Manhattan,9,28.00,Coffee beans,Organic Beans,Organic Decaf Blend
3302,3332,2023-01-07,07:50:42,1,5,Lower Manhattan,9,28.00,Coffee beans,Organic Beans,Organic Decaf Blend
3408,3438,2023-01-07,09:05:19,1,5,Lower Manhattan,9,28.00,Coffee beans,Organic Beans,Organic Decaf Blend
3447,3477,2023-01-07,09:42:52,1,5,Lower Manhattan,7,19.75,Coffee beans,Premium Beans,Jamacian Coffee River
...,...,...,...,...,...,...,...,...,...,...,...
148702,149043,2023-06-30,11:18:31,8,8,Hell's Kitchen,8,45.00,Coffee beans,Premium Beans,Civet Cat
148739,149080,2023-06-30,11:42:44,1,5,Lower Manhattan,2,18.00,Coffee beans,House blend Beans,Our Old Time Diner Blend
148784,149125,2023-06-30,13:03:56,1,5,Lower Manhattan,8,45.00,Coffee beans,Premium Beans,Civet Cat
148927,149268,2023-06-30,15:59:13,1,3,Astoria,7,19.75,Coffee beans,Premium Beans,Jamacian Coffee River


In [45]:
df[df['unit_price'].between(25, 40)]             
# Values between 10 and 20

Unnamed: 0,transaction_id,transaction_date,transaction_time,transaction_qty,store_id,store_location,product_id,unit_price,product_category,product_type,product_detail
3296,3326,2023-01-07,07:45:15,1,5,Lower Manhattan,9,28.0,Coffee beans,Organic Beans,Organic Decaf Blend
3302,3332,2023-01-07,07:50:42,1,5,Lower Manhattan,9,28.0,Coffee beans,Organic Beans,Organic Decaf Blend
3408,3438,2023-01-07,09:05:19,1,5,Lower Manhattan,9,28.0,Coffee beans,Organic Beans,Organic Decaf Blend
4598,4628,2023-01-09,09:42:16,1,5,Lower Manhattan,81,28.0,Branded,Clothing,I Need My Bean! T-shirt
5596,5626,2023-01-11,07:20:48,1,8,Hell's Kitchen,81,28.0,Branded,Clothing,I Need My Bean! T-shirt
...,...,...,...,...,...,...,...,...,...,...,...
143861,144198,2023-06-26,09:25:39,1,5,Lower Manhattan,81,28.0,Branded,Clothing,I Need My Bean! T-shirt
144254,144591,2023-06-26,13:44:30,1,5,Lower Manhattan,81,28.0,Branded,Clothing,I Need My Bean! T-shirt
144519,144856,2023-06-26,17:33:41,1,5,Lower Manhattan,81,28.0,Branded,Clothing,I Need My Bean! T-shirt
144680,145017,2023-06-27,07:09:41,1,5,Lower Manhattan,81,28.0,Branded,Clothing,I Need My Bean! T-shirt
