- In Pandas, the datetime module provides handling date and time data.
- The timedelta class allows manipulation of time intervals. 
- This combination is useful for time-based analysis and working with temporal data in a DataFrame.

In [1]:
import pandas as pd

# The date_range function is used to generate a sequence of dates within a range
# Can be used to create time indicies or date columns in a dataFrame
# The start and end parameters define the range, freq determines the frequence, like daily (D) or monthly (M)

#generate a date range
date_range = pd.date_range(start = '2025-05-12', end='2026-05-12', freq='M')
print(date_range)

DatetimeIndex(['2025-05-31', '2025-06-30', '2025-07-31', '2025-08-31',
               '2025-09-30', '2025-10-31', '2025-11-30', '2025-12-31',
               '2026-01-31', '2026-02-28', '2026-03-31', '2026-04-30'],
              dtype='datetime64[ns]', freq='ME')


  date_range = pd.date_range(start = '2025-05-12', end='2026-05-12', freq='M')


In [None]:
# Pandas provides the dt accessor to extract various components (Example: day, month, year) from a date column in a DataFrame. 
# This is valuable for time-based analysis when specific date attributes need to be considered.

# Assuming 'df' is your DataFrame with a 'Date' column
data = {'Date': ['2025-01-01', '2025-02-15', '2025-03-20']}
df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'])

# Extracting day, month, and year information
df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

# Displaying the DataFrame with extracted information
print(df[['Date', 'Day', 'Month', 'Year']])

# Extracting weekday and weekend information
df = pd.DataFrame({'Date': pd.date_range(start='2025-01-01', periods=5)})
df['Weekday'] = df['Date'].dt.weekday
df['IsWeekend'] = df['Date'].dt.weekday // 5 == 1
print(df[['Date', 'Weekday', 'IsWeekend']])

# Shifting dates forward or backward
df['Date'] = pd.to_datetime(df['Date'])
df['PreviousDate'] = df['Date'] - pd.Timedelta(days=1)
df['NextDate'] = df['Date'] + pd.Timedelta(days=1)
print(df[['Date', 'PreviousDate', 'NextDate']])

In [None]:
# The Timedelta class in Pandas represents a duration or the difference between two dates or times.
# It can be created by specifying the desired duration, such as days, hours, or minutes.

data = {
    'Date': pd.date_range(start='2025-01-01', periods=10, freq='H'),
    'Value1': range(10),
    'Value2': range(10, 20)
}
df = pd.DataFrame(data)

# Creating a timedelta of 3 days
delta = pd.Timedelta(days=3)

In [None]:
# Timedelta objects can be used to perform arithmetic operations on dates. 
# Adding timedelta to a date results in a new date, useful for calculating future or past dates based on a given time interval

# Performing arithmetic operations with timedeltas
df['Date'] = pd.to_datetime(df['Date'])
df['FutureDate'] = df['Date'] + pd.Timedelta(weeks=2, days=3, hours=12)
print(df[['Date', 'FutureDate']])

In [None]:
# Time series data often comes with irregular time intervals. 
# Resampling is the process of changing the frequency of the time series data, either by upsampling (increasing frequency) or downsampling (decreasing frequency).

# Resampling time series data
df.set_index('Date', inplace=True)
df_resampled = df.select_dtypes(include='number').resample('D').sum()
print(df_resampled)

In [None]:
# Pandas provides the categorical class to create a categorical variable. 
# Categorical variables are useful when dealing with data that can be divided into distinct, non-numeric categories.

# Creating a categorical variable
categories = ['Low', 'Medium', 'High']
values = ['Low', 'Medium', 'High', 'Low', 'High']
cat_variable = pd.Categorical(values, categories=categories, ordered=True)
print(cat_variable)

# The value_counts() method is used to count the occurrences of each category in a categorical column of a DataFrame.
df = pd.DataFrame({'Category': ['A', 'B', 'A', 'C', 'B', 'A']})

# Counting occurrences of each category
category_counts = df['Category'].value_counts()
print(category_counts)

In [None]:
# When working with machine learning models or statistical analyses, creating dummy variables is often necessary to represent categorical data numerically. 
# The get_dummies function creates binary columns for each category, effectively converting categorical data into a numerical format.

df = pd.DataFrame({'Category': ['A', 'B', 'A', 'C', 'B', 'A']})

# Creating dummy variables for categorical data
dummy_variables = pd.get_dummies(df['Category'], prefix='Category')
print(dummy_variables)


#Another way of handling categorical data is through label encoding.
#each category is assigned a unique numerical label. 
df = pd.DataFrame({'Category': ['A', 'B', 'A', 'C', 'B', 'A']})

# Label Encoding
df['Category_LabelEncoded'] = df['Category'].astype('category').cat.codes
print(df[['Category', 'Category_LabelEncoded']])
