In [8]:
import pandas as pd
import numpy as np

# Sample data
data = {
    'Region': ['East', 'East', 'West', 'West', 'East', 'West'],
    'OrderDate': pd.to_datetime([
        '2023-01-15', '2023-01-20', '2023-01-15',
        '2023-02-01', '2023-02-10', '2023-02-20'
    ]),
    'Sales': [100, 150, 200, 300, 250, 400]
}

df = pd.DataFrame(data)
df = df.set_index(['Region', 'OrderDate'])
print(df)


                   Sales
Region OrderDate        
East   2023-01-15    100
       2023-01-20    150
West   2023-01-15    200
       2023-02-01    300
East   2023-02-10    250
West   2023-02-20    400


In [9]:
monthly_sales = df.groupby(
    pd.Grouper(level='OrderDate', freq='M')
)['Sales'].sum()

print(monthly_sales)


OrderDate
2023-01-31    450
2023-02-28    950
Freq: ME, Name: Sales, dtype: int64


  pd.Grouper(level='OrderDate', freq='M')


In [10]:
df2 = pd.DataFrame({
    'A': [10, 20, 30],
    'B': [1, 2, 3],
    'C': [5, 6, 7]
})

# Group columns by their first letter (A, B, C)
grouped = df2.groupby(lambda x: x[0], axis=1).sum()
print(grouped)


    A  B  C
0  10  1  5
1  20  2  6
2  30  3  7


  grouped = df2.groupby(lambda x: x[0], axis=1).sum()


In [11]:
df3 = pd.DataFrame({
    'Category': ['B', 'A', 'C', 'B', 'A'],
    'Value': [10, 20, 30, 40, 50]
})

# Group without sorting
grouped_unsorted = df3.groupby('Category', sort=False)['Value'].sum()
print("Unsorted groups:")
print(grouped_unsorted)

# Group with sorting (default True)
grouped_sorted = df3.groupby('Category', sort=True)['Value'].sum()
print("\nSorted groups:")
print(grouped_sorted)


Unsorted groups:
Category
B    50
A    70
C    30
Name: Value, dtype: int64

Sorted groups:
Category
A    70
B    50
C    30
Name: Value, dtype: int64


In [12]:
dates = pd.date_range('2023-01-01', periods=8, freq='D')
df4 = pd.DataFrame({
    'Date': dates,
    'Sales': [5, 10, 15, 20, 25, 30, 35, 40]
}).set_index('Date')

# closed='left', label='left' (default)
print(df4.groupby(pd.Grouper(freq='W', closed='left', label='left')).sum())

# closed='right', label='right'
print(df4.groupby(pd.Grouper(freq='W', closed='right', label='right')).sum())


            Sales
Date             
2023-01-01    140
2023-01-08     40
            Sales
Date             
2023-01-01      5
2023-01-08    175


In [13]:
periods = pd.period_range('2023-01', periods=3, freq='M')
df5 = pd.DataFrame({'Sales': [100, 200, 300]}, index=periods)

print(df5.groupby(pd.Grouper(freq='M', convention='start')).sum())
print(df5.groupby(pd.Grouper(freq='M', convention='end')).sum())


         Sales
2023-01    100
2023-02    200
2023-03    300
         Sales
2023-01    100
2023-02    200
2023-03    300


  print(df5.groupby(pd.Grouper(freq='M', convention='start')).sum())
  print(df5.groupby(pd.Grouper(freq='M', convention='start')).sum())
  print(df5.groupby(pd.Grouper(freq='M', convention='end')).sum())
  print(df5.groupby(pd.Grouper(freq='M', convention='end')).sum())


In [14]:
dates2 = pd.date_range('2023-01-01 06:00', periods=5, freq='12H')
df6 = pd.DataFrame({'Sales': [10, 20, 30, 40, 50]}, index=dates2)

print(df6.groupby(pd.Grouper(freq='D', origin='start_day')).sum())  # Default
print(df6.groupby(pd.Grouper(freq='D', origin='epoch')).sum())      # Epoch start (1970-01-01)


            Sales
2023-01-01     30
2023-01-02     70
2023-01-03     50
            Sales
2023-01-01     30
2023-01-02     70
2023-01-03     50


  dates2 = pd.date_range('2023-01-01 06:00', periods=5, freq='12H')
