In [11]:
import pandas as pd
from datetime import datetime

In [12]:
# Sample DataFrame with nulls
data = {
    'customer_id': [1, 2, 3, 4, 5, 1, None],
    'product': ['A', 'B', 'C', 'A', None, 'A', 'C'],
    'quantity': [2, 1, 5, 3, 2, None, 1],
    'price': [10.5, 20.0, 7.5, 10.5, 20.0, 10.5, None],
    'purchase_date': [
        datetime(2025, 4, 1),
        datetime(2025, 4, 2),
        datetime(2025, 4, 2),
        datetime(2025, 4, 3),
        datetime(2025, 4, 4),
        None,
        datetime(2025, 4, 5)
    ],
    'ship_date': [
        datetime(2025, 4, 2),
        datetime(2025, 4, 3),
        datetime(2025, 4, 3),
        datetime(2025, 4, 4),
        datetime(2025, 4, 7),
        datetime(2025, 4, 6),
        None
    ],
    'region': ['North', 'South', 'East', 'North', 'South', 'North', 'East']
}

df = pd.DataFrame(data)


# 1. Select specific columns

In [13]:
df[['customer_id', 'product']]

Unnamed: 0,customer_id,product
0,1.0,A
1,2.0,B
2,3.0,C
3,4.0,A
4,5.0,
5,1.0,A
6,,C


# 2. Filter rows using condition

In [14]:
df[df['price'] > 10]

Unnamed: 0,customer_id,product,quantity,price,purchase_date,ship_date,region
0,1.0,A,2.0,10.5,2025-04-01,2025-04-02,North
1,2.0,B,1.0,20.0,2025-04-02,2025-04-03,South
3,4.0,A,3.0,10.5,2025-04-03,2025-04-04,North
4,5.0,,2.0,20.0,2025-04-04,2025-04-07,South
5,1.0,A,,10.5,NaT,2025-04-06,North


# 3. Filter rows using `.between()`

In [15]:
df[df['price'].between(10, 20)]

Unnamed: 0,customer_id,product,quantity,price,purchase_date,ship_date,region
0,1.0,A,2.0,10.5,2025-04-01,2025-04-02,North
1,2.0,B,1.0,20.0,2025-04-02,2025-04-03,South
3,4.0,A,3.0,10.5,2025-04-03,2025-04-04,North
4,5.0,,2.0,20.0,2025-04-04,2025-04-07,South
5,1.0,A,,10.5,NaT,2025-04-06,North


# 4. Rename columns

In [16]:
df.rename(columns={'product': 'product_name'})

Unnamed: 0,customer_id,product_name,quantity,price,purchase_date,ship_date,region
0,1.0,A,2.0,10.5,2025-04-01,2025-04-02,North
1,2.0,B,1.0,20.0,2025-04-02,2025-04-03,South
2,3.0,C,5.0,7.5,2025-04-02,2025-04-03,East
3,4.0,A,3.0,10.5,2025-04-03,2025-04-04,North
4,5.0,,2.0,20.0,2025-04-04,2025-04-07,South
5,1.0,A,,10.5,NaT,2025-04-06,North
6,,C,1.0,,2025-04-05,NaT,East


# 5. Sort Values

In [17]:
df.sort_values(by='customer_id',ascending=False)

Unnamed: 0,customer_id,product,quantity,price,purchase_date,ship_date,region
4,5.0,,2.0,20.0,2025-04-04,2025-04-07,South
3,4.0,A,3.0,10.5,2025-04-03,2025-04-04,North
2,3.0,C,5.0,7.5,2025-04-02,2025-04-03,East
1,2.0,B,1.0,20.0,2025-04-02,2025-04-03,South
0,1.0,A,2.0,10.5,2025-04-01,2025-04-02,North
5,1.0,A,,10.5,NaT,2025-04-06,North
6,,C,1.0,,2025-04-05,NaT,East


In [18]:
df.sort_values(by='customer_id', ascending=True)

Unnamed: 0,customer_id,product,quantity,price,purchase_date,ship_date,region
0,1.0,A,2.0,10.5,2025-04-01,2025-04-02,North
5,1.0,A,,10.5,NaT,2025-04-06,North
1,2.0,B,1.0,20.0,2025-04-02,2025-04-03,South
2,3.0,C,5.0,7.5,2025-04-02,2025-04-03,East
3,4.0,A,3.0,10.5,2025-04-03,2025-04-04,North
4,5.0,,2.0,20.0,2025-04-04,2025-04-07,South
6,,C,1.0,,2025-04-05,NaT,East


# 6. Select rows using .loc (label-based)

In [20]:
df.loc[4]

customer_id                      5.0
product                         None
quantity                         2.0
price                           20.0
purchase_date    2025-04-04 00:00:00
ship_date        2025-04-07 00:00:00
region                         South
Name: 4, dtype: object

In [21]:
df.loc[df['price'] > 10, ['customer_id', 'purchase_date', 'ship_date']]

Unnamed: 0,customer_id,purchase_date,ship_date
0,1.0,2025-04-01,2025-04-02
1,2.0,2025-04-02,2025-04-03
3,4.0,2025-04-03,2025-04-04
4,5.0,2025-04-04,2025-04-07
5,1.0,NaT,2025-04-06


# 7. Select rows using .iloc (position-based)

In [22]:
df.iloc[5]

customer_id                      1.0
product                            A
quantity                         NaN
price                           10.5
purchase_date                    NaT
ship_date        2025-04-06 00:00:00
region                         North
Name: 5, dtype: object