In [22]:
import pandas as pd
import numpy as np

# Create a date range for the first half of September 2025
dates = pd.date_range(start='2025-09-01', periods=15, freq='D') # 'D' for daily frequency

# Create a DataFrame with this DatetimeIndex
data = {
    'sales': np.random.randint(100, 200, size=15),
    'customers': np.random.randint(10, 25, size=15)
}
df = pd.DataFrame(data, index=dates)
df
# print("--- DataFrame with DatetimeIndex ---")
# print(df.head())

# print("\n--- Index Type ---")
# print(type(df.index))

# Key takeaway: Using pd.date_range() is a quick way to generate a sequence of dates. 
# Setting this as the DataFrame's index unlocks powerful time-based capabilities.

Unnamed: 0,sales,customers
2025-09-01,147,21
2025-09-02,141,17
2025-09-03,102,21
2025-09-04,152,21
2025-09-05,153,20
2025-09-06,151,14
2025-09-07,125,15
2025-09-08,140,24
2025-09-09,153,13
2025-09-10,157,24


In [8]:
# --- Slicing Examples ---

# 1. Get data for a specific day
print("\n--- Data for September 5th, 2025 ---")
print(df.loc['2025-09-05'])

# 2. Get data for an entire month (partial string matching)
print("\n--- Data for September 2025 ---")
print(df.loc['2025-09'])

# 3. Get data for a specific date range
print("\n--- Data from Sep 3rd to Sep 7th ---")
print(df.loc['2025-09-03':'2025-09-07'])

# Key takeaway: Pandas understands date formats in strings,
allowing you to slice your data without complex filtering.


--- Data for September 5th, 2025 ---
sales        143
customers     23
Name: 2025-09-05 00:00:00, dtype: int32

--- Data for September 2025 ---
            sales  customers
2025-09-01    124         16
2025-09-02    106         22
2025-09-03    121         12
2025-09-04    162         14
2025-09-05    143         23
2025-09-06    121         13
2025-09-07    151         14
2025-09-08    173         23
2025-09-09    185         21
2025-09-10    177         22
2025-09-11    110         22
2025-09-12    103         17
2025-09-13    168         18
2025-09-14    171         18
2025-09-15    192         19

--- Data from Sep 3rd to Sep 7th ---
            sales  customers
2025-09-03    121         12
2025-09-04    162         14
2025-09-05    143         23
2025-09-06    121         13
2025-09-07    151         14


In [15]:
## Combining Frequencies
import pandas as pd

# Generate dates for every 10 days
every_10_days = pd.date_range(start='2025-09-01', 
                              periods=4, freq='10D')
print(f"Every 10 days:\n{every_10_days}\n")

# Generate dates for every 2nd month start
every_2_months = pd.date_range(start='2025-09-01', 
                               periods=4, freq='2MS')
print(f"Every 2nd month start:\n{every_2_months}")

# Generate dates for every weekly month start with friday
every_2_months = pd.date_range(start='2025-09-01',
                               periods=4, freq='W-FRI')
print(f"Every 2nd month start:\n{every_2_months}")



Every 10 days:
DatetimeIndex(['2025-09-01', '2025-09-11', '2025-09-21', '2025-10-01'], dtype='datetime64[ns]', freq='10D')

Every 2nd month start:
DatetimeIndex(['2025-09-05', '2025-09-12', '2025-09-19', '2025-09-26'], dtype='datetime64[ns]', freq='W-FRI')


In [19]:
import pandas as pd

# Generate dates for every Friday
fridays = pd.date_range(start='2025-09-01', 
                        periods=4, freq='W-FRI')
print(f"Every Friday:\n{fridays}\n")

# Generate dates for the end of the financial year ending in March
fiscal_year_ends = pd.date_range(start='2025-01-01', 
                                 periods=3, freq='YE-MAR')
print(f"Fiscal year ends (March):\n{fiscal_year_ends}")

Every Friday:
DatetimeIndex(['2025-09-05', '2025-09-12', '2025-09-19', '2025-09-26'], dtype='datetime64[ns]', freq='W-FRI')

Fiscal year ends (March):
DatetimeIndex(['2025-03-31', '2026-03-31', '2027-03-31'], dtype='datetime64[ns]', freq='YE-MAR')


In [20]:
import pandas as pd

# We specify the start and the number of periods
date_list = pd.date_range(start='2025-09-01',
                          periods=15, freq='D')

print(f"Number of dates generated: {len(date_list)}")
print(f"First date: {date_list[0]}")
print(f"Last date: {date_list[-1]}")

Number of dates generated: 15
First date: 2025-09-01 00:00:00
Last date: 2025-09-15 00:00:00


In [24]:
import pandas as pd

# We specify the start and end dates
date_list = pd.date_range(start='2025-09-01',
                          end='2025-09-15', freq='D')

print(f"Number of periods calculated: {len(date_list)}")
print(f"Number : {date_list}")


Number of periods calculated: 15
Number : DatetimeIndex(['2025-09-01', '2025-09-02', '2025-09-03', '2025-09-04',
               '2025-09-05', '2025-09-06', '2025-09-07', '2025-09-08',
               '2025-09-09', '2025-09-10', '2025-09-11', '2025-09-12',
               '2025-09-13', '2025-09-14', '2025-09-15'],
              dtype='datetime64[ns]', freq='D')


In [29]:
# 3. Resampling: Changing Frequency
# Resampling is the process of changing the time frequency of your data.
# Downsampling: Aggregating data to a coarser, lower frequency (e.g., daily to weekly).
# Upsampling: Converting data to a finer, higher frequency (e.g., daily to hourly).

# --- Downsampling ---
# Aggregate daily sales data into weekly sums
weekly_sales = df['sales'].resample('W').sum() # 'W' for weekly frequency
print("\n--- Weekly Total Sales ---")
print(weekly_sales)

# --- Upsampling ---
# Convert daily data to 8-hour frequency and fill missing values
hourly_data = df.resample('8h').asfreq()
# Use ffill (forward fill) to propagate the last valid observation
hourly_data_filled = df.resample('8h').ffill()

print("\n--- Upsampled to 8-hour periods (with NaNs) ---")
print(hourly_data.head())
print("\n--- Upsampled and Forward-Filled ---")
print(hourly_data_filled.head())


--- Weekly Total Sales ---
2025-09-07     971
2025-09-14    1037
2025-09-21     140
Freq: W-SUN, Name: sales, dtype: int32

--- Upsampled to 8-hour periods (with NaNs) ---
                     sales  customers
2025-09-01 00:00:00  147.0       21.0
2025-09-01 08:00:00    NaN        NaN
2025-09-01 16:00:00    NaN        NaN
2025-09-02 00:00:00  141.0       17.0
2025-09-02 08:00:00    NaN        NaN

--- Upsampled and Forward-Filled ---
                     sales  customers
2025-09-01 00:00:00    147         21
2025-09-01 08:00:00    147         21
2025-09-01 16:00:00    147         21
2025-09-02 00:00:00    141         17
2025-09-02 08:00:00    141         17


In [33]:
# 4. Rolling Windows and Shifting 📈
# These techniques are essential for calculations like moving averages or period-over-period changes.
# .rolling(): Creates a window of a specified size that "slides" over the data, 
allowing you to compute statistics for that window.
# .shift(): Moves the data backward or forward by a specified number of periods.

# --- Rolling Window Example ---
# Calculate the 3-day rolling (moving) average of sales
df['3D_rolling_avg'] = df['sales'].rolling(window=3).mean()

print("\n--- Sales with 3-Day Rolling Average ---")
print(df)


# --- Shifting Example ---
# Create a new column with the previous day's sales
df['previous_day_sales'] = df['sales'].shift(1)

print("\n--- Sales with Previous Day's Sales ---")
print(df)

# Key takeaway: Rolling windows are perfect for smoothing out short-term
fluctuations to see longer-term trends.
# Shifting is fundamental for comparing current values to past values.


--- Sales with 3-Day Rolling Average ---
            sales  customers  3D_rolling_avg  previous_day_sales
2025-09-01    147         21             NaN                 NaN
2025-09-02    141         17             NaN               147.0
2025-09-03    102         21      130.000000               141.0
2025-09-04    152         21      131.666667               102.0
2025-09-05    153         20      135.666667               152.0
2025-09-06    151         14      152.000000               153.0
2025-09-07    125         15      143.000000               151.0
2025-09-08    140         24      138.666667               125.0
2025-09-09    153         13      139.333333               140.0
2025-09-10    157         24      150.000000               153.0
2025-09-11    141         21      150.333333               157.0
2025-09-12    146         15      148.000000               141.0
2025-09-13    137         17      141.333333               146.0
2025-09-14    163         10      148.666667    