In [2]:
# Step 1: Create a Sample Dataset (Realistic)

import pandas as pd

data = {
    'order_id': [1, 2, 3, 4, 5],
    'order_date': [
        '2024-01-05', '2024-01-20',
        '2024-02-10', '2024-02-25',
        '2024-03-05'
    ],
    'revenue': [200, 450, 300, 500, 700]
}

df = pd.DataFrame(data)

In [4]:
# Step 2: Convert to Datetime (MOST IMPORTANT)

df.head()

Unnamed: 0,order_id,order_date,revenue
0,1,2024-01-05,200
1,2,2024-01-20,450
2,3,2024-02-10,300
3,4,2024-02-25,500
4,5,2024-03-05,700


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   order_id    5 non-null      int64 
 1   order_date  5 non-null      object
 2   revenue     5 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 248.0+ bytes


In [20]:
df['order_date'] = pd.to_datetime(df['order_date'])
df.dtypes

order_id               int64
order_date    datetime64[ns]
revenue                int64
dtype: object

In [22]:
# Step 3: Extract Date Features

df['year'] = df['order_date'].dt.year
df['month'] = df['order_date'].dt.month
df['month_name'] = df['order_date'].dt.month_name()
df['day'] = df['order_date'].dt.day
df['day_name'] = df['order_date'].dt.day_name()

In [24]:
df.head(3)

Unnamed: 0,order_id,order_date,revenue,year,month,month_name,day,day_name
0,1,2024-01-05,200,2024,1,January,5,Friday
1,2,2024-01-20,450,2024,1,January,20,Saturday
2,3,2024-02-10,300,2024,2,February,10,Saturday


In [28]:
# Step 4: Monthly Revenue Trend

monthly_revenue = (
    df.groupby(['year','month'])['revenue']
    .sum()
    .reset_index()
)
monthly_revenue

Unnamed: 0,year,month,revenue
0,2024,1,650
1,2024,2,800
2,2024,3,700


In [30]:
# Step 5: Use Datetime as Index (Pro Move)

df.set_index('order_date', inplace = True)

In [32]:
df

Unnamed: 0_level_0,order_id,revenue,year,month,month_name,day,day_name
order_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-01-05,1,200,2024,1,January,5,Friday
2024-01-20,2,450,2024,1,January,20,Saturday
2024-02-10,3,300,2024,2,February,10,Saturday
2024-02-25,4,500,2024,2,February,25,Sunday
2024-03-05,5,700,2024,3,March,5,Tuesday


In [42]:
# Step 6: Time-Based Resampling

monthly_rev_resample = df['revenue'].resample('M').sum()
monthly_rev_resample

order_date
2024-01-31    650
2024-02-29    800
2024-03-31    700
Freq: M, Name: revenue, dtype: int64

In [56]:
# Step 7: Rolling Metrics (Advanced)

df['rolling_average'] = df['revenue'].rolling(window = 3).sum()

In [58]:
df

Unnamed: 0_level_0,order_id,revenue,year,month,month_name,day,day_name,rolling_average
order_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-01-05,1,200,2024,1,January,5,Friday,
2024-01-20,2,450,2024,1,January,20,Saturday,
2024-02-10,3,300,2024,2,February,10,Saturday,950.0
2024-02-25,4,500,2024,2,February,25,Sunday,1250.0
2024-03-05,5,700,2024,3,March,5,Tuesday,1500.0


In [54]:
# Mini Tasks

# Calculate weekly revenue

# Extract quarter

# Create a 3-period rolling average