# Chapter 22 - Working with Date Columns

### Revisiting Python dates

In [3]:
from datetime import date, time, datetime, timedelta

In [4]:
today = date(2020, 9, 30)
now = datetime(2020, 9, 30, 12, 33)
birthday = date(1989, 3, 21)

days_of_holiday = timedelta(days=14)
minutes_of_nap = timedelta(minutes=30)

In [5]:
today + days_of_holiday

datetime.date(2020, 10, 14)

In [6]:
now + minutes_of_nap

datetime.datetime(2020, 9, 30, 13, 3)

In [7]:
today > birthday

True

In [8]:
today - birthday

datetime.timedelta(days=11516)

In [9]:
birthday.strftime('%d/%m/%y')

'21/03/89'

In [10]:
birthday.strftime('%A, %d %B, %Y')

'Tuesday, 21 March, 1989'

In [11]:
my_date = '30/09/20'

datetime.strptime(my_date, '%d/%m/%y')

datetime.datetime(2020, 9, 30, 0, 0)

### Date columns

In [13]:
import pandas as pd

# Read Dataframe 'Q1Sales.csv'
url = ("https://raw.githubusercontent.com/pythonforaccounting/workspace/refs/heads/main/P2%20-%20Working%20with%20tables/Q1Sales.csv")
ledger_df = pd.read_csv(url)

ledger_df.head()

Unnamed: 0,InvoiceNo,Channel,Product Name,ProductID,Account,AccountNo,Date,Deadline,Currency,Unit Price,Quantity,Total
0,1532,Shoppe.com,Cannon Water Bomb Balloons 100 Pack,T&G/CAN-97509,Sales,5004,2020-01-01,11/23/19,USD,20.11,14,281.54
1,1533,Walcart,LEGO Ninja Turtles Stealth Shell in Pursuit 79102,T&G/LEG-37777,Sales,5004,2020-01-01,06/15/20,USD,6.7,1,6.7
2,1534,Bullseye,,T&G/PET-14209,Sales,5004,2020-01-01,05/07/20,USD,11.67,5,58.35
3,1535,Bullseye,Transformers Age of Extinction Generations Del...,T&G/TRA-20170,Sales,5004,2020-01-01,12/22/19,USD,13.46,6,80.76
4,1535,Bullseye,Transformers Age of Extinction Generations Del...,T&G/TRA-20170,Sales,5004,2020-01-01,12/22/19,USD,13.46,6,80.76


In [14]:
ledger_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37708 entries, 0 to 37707
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   InvoiceNo     37708 non-null  int64  
 1   Channel       37708 non-null  object 
 2   Product Name  33142 non-null  object 
 3   ProductID     37708 non-null  object 
 4   Account       37708 non-null  object 
 5   AccountNo     37708 non-null  int64  
 6   Date          37708 non-null  object 
 7   Deadline      37708 non-null  object 
 8   Currency      37708 non-null  object 
 9   Unit Price    37708 non-null  float64
 10  Quantity      37708 non-null  int64  
 11  Total         37708 non-null  float64
dtypes: float64(2), int64(3), object(7)
memory usage: 3.5+ MB


In [15]:
ledger_df['Date'].iloc[0]

'2020-01-01'

In [16]:
timestamp_date = pd.Timestamp(2020, 1, 1)

datetime_date = datetime(2020, 1, 1)

timestamp_date == datetime_date

True

In [17]:
ledger_df[['Date', 'Deadline']]

Unnamed: 0,Date,Deadline
0,2020-01-01,11/23/19
1,2020-01-01,06/15/20
2,2020-01-01,05/07/20
3,2020-01-01,12/22/19
4,2020-01-01,12/22/19
...,...,...
37703,2020-03-31,Thu Sep 17 00:00:00 2020
37704,2020-03-31,5-08-20
37705,2020-03-31,04/11/20
37706,2020-03-31,3-20-20


### Converting strings to dates

In [19]:
# Check for unique date formats
ledger_df['Deadline'].head(20)

0                     11/23/19
1                     06/15/20
2                     05/07/20
3                     12/22/19
4                     12/22/19
5     Mon Jan 20 00:00:00 2020
6     Thu Oct 31 00:00:00 2019
7                      6-28-20
8     Thu Jan  2 00:00:00 2020
9     Sat Nov 30 00:00:00 2019
10                    05/25/20
11                    05/06/20
12    Sun Apr 19 00:00:00 2020
13                    01/31/20
14    Mon Mar 30 00:00:00 2020
15               March 14 2020
16            November 29 2019
17                    11/09/19
18                     2-23-20
19                    10/16/19
Name: Deadline, dtype: object

### Handle to '%m/%d/%y'

In [21]:
# Function to handle different date formats and standardize them to '%m/%d/%y'
def convert_to_standard_format(date_str):
    try:
        # Try to convert with the format 'mm/dd/yy'
        return pd.to_datetime(date_str, format='%m/%d/%y', errors='raise').strftime('%m/%d/%y')
    except Exception:
        try:
            # Try to convert with the format 'mm-dd-yy'
            return pd.to_datetime(date_str, format='%m-%d-%y', errors='raise').strftime('%m/%d/%y')
        except Exception:
            try:
                # Try to convert with the format 'Month dd yyyy'
                return pd.to_datetime(date_str, format='%b %d %Y', errors='raise').strftime('%m/%d/%y')
            except Exception:
                try:
                    # Try to convert with the format 'dd-Month-yyyy'
                    return pd.to_datetime(date_str, format='%d-%b-%Y', errors='raise').strftime('%m/%d/%y')
                except Exception:
                    # Try to handle fully spelled months (e.g., "January 1 2020")
                    try:
                        return pd.to_datetime(date_str, errors='coerce').strftime('%m/%d/%y')  # Use the default parser as fallback
                    except Exception:
                        return pd.NaT  # Return NaT if none of the formats worked

# Apply the conversion function to the 'Deadline' column
ledger_df['Deadline'] = ledger_df['Deadline'].apply(convert_to_standard_format)

# Verify the results
print(ledger_df['Deadline'].head(20))

0     11/23/19
1     06/15/20
2     05/07/20
3     12/22/19
4     12/22/19
5     01/20/20
6     10/31/19
7     06/28/20
8     01/02/20
9     11/30/19
10    05/25/20
11    05/06/20
12    04/19/20
13    01/31/20
14    03/30/20
15    03/14/20
16    11/29/19
17    11/09/19
18    02/23/20
19    10/16/19
Name: Deadline, dtype: object


### Handle to %m-%d-%y'

In [23]:
# Function to handle different date formats and standardize them to '%m-%d-%y'
def convert_to_standard_format(date_str):
    try:
        # Try to convert with the format '%m-%d-%y'
        return pd.to_datetime(date_str, format='%m-%d-%y', errors='raise').strftime('%m-%d-%y')
    except Exception:
        try:
            # Try to convert with the format 'mm/dd/yy'
            return pd.to_datetime(date_str, format='%m-%d-%y', errors='raise').strftime('%m-%d-%y')
        except Exception:
            try:
                # Try to convert with the format 'Month dd yyyy'
                return pd.to_datetime(date_str, format='%b %d %Y', errors='raise').strftime('%m-%d-%y')
            except Exception:
                try:
                    # Try to convert with the format 'dd-Month-yyyy'
                    return pd.to_datetime(date_str, format='%d-%b-%Y', errors='raise').strftime('%m-%d-%y')
                except Exception:
                    # Try to handle fully spelled months (e.g., "January 1 2020")
                    try:
                        return pd.to_datetime(date_str, errors='coerce').strftime('%m-%d-%y')  # Use the default parser as fallback
                    except Exception:
                        return pd.NaT  # Return NaT if none of the formats worked

# Apply the conversion function to the 'Deadline' column
ledger_df['Deadline'] = ledger_df['Deadline'].apply(convert_to_standard_format)

# Verify the results
print(ledger_df['Deadline'].head(20))

0     11-23-19
1     06-15-20
2     05-07-20
3     12-22-19
4     12-22-19
5     01-20-20
6     10-31-19
7     06-28-20
8     01-02-20
9     11-30-19
10    05-25-20
11    05-06-20
12    04-19-20
13    01-31-20
14    03-30-20
15    03-14-20
16    11-29-19
17    11-09-19
18    02-23-20
19    10-16-19
Name: Deadline, dtype: object


### Handle to 'YYYY-MM-DD'

In [25]:
# Function to handle different date formats and standardize them to 'YYYY-MM-DD'
def convert_to_standard_format(date_str):
    try:
        # Try to convert with the format 'mm/dd/yy'
        return pd.to_datetime(date_str, format='%m/%d/%y', errors='raise').strftime('%Y-%m-%d')
    except Exception:
        try:
            # Try to convert with the format 'mm-dd-yy'
            return pd.to_datetime(date_str, format='%m-%d-%y', errors='raise').strftime('%Y-%m-%d')
        except Exception:
            try:
                # Try to convert with the format 'Month dd yyyy'
                return pd.to_datetime(date_str, format='%b %d %Y', errors='raise').strftime('%Y-%m-%d')
            except Exception:
                try:
                    # Try to convert with the format 'dd-Month-yyyy'
                    return pd.to_datetime(date_str, format='%d-%b-%Y', errors='raise').strftime('%Y-%m-%d')
                except Exception:
                    # Try to handle fully spelled months (e.g., "January 1 2020")
                    try:
                        return pd.to_datetime(date_str, errors='coerce').strftime('%Y-%m-%d')  # Use the default parser as fallback
                    except Exception:
                        return pd.NaT  # Return NaT if none of the formats worked

# Apply the conversion function to the 'Deadline' column
ledger_df['Deadline'] = ledger_df['Deadline'].apply(convert_to_standard_format)

# Verify the results
print(ledger_df['Deadline'].head(20))

0     2019-11-23
1     2020-06-15
2     2020-05-07
3     2019-12-22
4     2019-12-22
5     2020-01-20
6     2019-10-31
7     2020-06-28
8     2020-01-02
9     2019-11-30
10    2020-05-25
11    2020-05-06
12    2020-04-19
13    2020-01-31
14    2020-03-30
15    2020-03-14
16    2019-11-29
17    2019-11-09
18    2020-02-23
19    2019-10-16
Name: Deadline, dtype: object


In [26]:
# Example date series
dates = pd.Series(['05/07/20', '05/08/20', '05/09/20'])

# Convert dates to datetime using the specified format
dates_parsed = pd.to_datetime(dates, format='%m/%d/%y')

dates_parsed

0   2020-05-07
1   2020-05-08
2   2020-05-09
dtype: datetime64[ns]

In [27]:
pd.to_datetime(dates, format='%d/%m/%y')

0   2020-07-05
1   2020-08-05
2   2020-09-05
dtype: datetime64[ns]

In [28]:
pd.to_datetime(ledger_df['Deadline']).describe()

count                            37708
mean     2020-03-25 01:33:33.662883072
min                2019-10-04 00:00:00
25%                2020-01-18 00:00:00
50%                2020-03-25 00:00:00
75%                2020-06-01 00:00:00
max                2020-09-25 00:00:00
Name: Deadline, dtype: object

In [29]:
ledger_df['Deadline']

0        2019-11-23
1        2020-06-15
2        2020-05-07
3        2019-12-22
4        2019-12-22
            ...    
37703    2020-09-17
37704    2020-05-08
37705    2020-04-11
37706    2020-03-20
37707    2020-04-02
Name: Deadline, Length: 37708, dtype: object

In [30]:
ledger_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37708 entries, 0 to 37707
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   InvoiceNo     37708 non-null  int64  
 1   Channel       37708 non-null  object 
 2   Product Name  33142 non-null  object 
 3   ProductID     37708 non-null  object 
 4   Account       37708 non-null  object 
 5   AccountNo     37708 non-null  int64  
 6   Date          37708 non-null  object 
 7   Deadline      37708 non-null  object 
 8   Currency      37708 non-null  object 
 9   Unit Price    37708 non-null  float64
 10  Quantity      37708 non-null  int64  
 11  Total         37708 non-null  float64
dtypes: float64(2), int64(3), object(7)
memory usage: 3.5+ MB


In [31]:
ledger_df[['Date', 'Deadline']]   # object, object

Unnamed: 0,Date,Deadline
0,2020-01-01,2019-11-23
1,2020-01-01,2020-06-15
2,2020-01-01,2020-05-07
3,2020-01-01,2019-12-22
4,2020-01-01,2019-12-22
...,...,...
37703,2020-03-31,2020-09-17
37704,2020-03-31,2020-05-08
37705,2020-03-31,2020-04-11
37706,2020-03-31,2020-03-20


### Convert column 'Date' & 'Deadline' from object to datetime64[ns]

In [33]:
# Convert column 'Date' to datetime64[ns]
ledger_df['Date'] = pd.to_datetime(ledger_df['Date'], errors='coerce')

# Convert column 'Deadline' to datetime64[ns]
ledger_df['Deadline'] = pd.to_datetime(ledger_df['Deadline'], errors='coerce')

ledger_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37708 entries, 0 to 37707
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   InvoiceNo     37708 non-null  int64         
 1   Channel       37708 non-null  object        
 2   Product Name  33142 non-null  object        
 3   ProductID     37708 non-null  object        
 4   Account       37708 non-null  object        
 5   AccountNo     37708 non-null  int64         
 6   Date          37708 non-null  datetime64[ns]
 7   Deadline      37708 non-null  datetime64[ns]
 8   Currency      37708 non-null  object        
 9   Unit Price    37708 non-null  float64       
 10  Quantity      37708 non-null  int64         
 11  Total         37708 non-null  float64       
dtypes: datetime64[ns](2), float64(2), int64(3), object(5)
memory usage: 3.5+ MB


### Pandas date methods

In [35]:
ledger_df['Deadline'].dt.year

0        2019
1        2020
2        2020
3        2019
4        2019
         ... 
37703    2020
37704    2020
37705    2020
37706    2020
37707    2020
Name: Deadline, Length: 37708, dtype: int32

In [36]:
ledger_df['Deadline'].dt.day_name()

0        Saturday
1          Monday
2        Thursday
3          Sunday
4          Sunday
           ...   
37703    Thursday
37704      Friday
37705    Saturday
37706      Friday
37707    Thursday
Name: Deadline, Length: 37708, dtype: object

In [37]:
ledger_df['Deadline'].dt.day_name('de_DE')  # In German

0           Samstag
1            Montag
2        Donnerstag
3           Sonntag
4           Sonntag
            ...    
37703    Donnerstag
37704       Freitag
37705       Samstag
37706       Freitag
37707    Donnerstag
Name: Deadline, Length: 37708, dtype: object

### Filtering date columns

In [39]:
ledger_df[ledger_df['Date'] > '15/01/2020']

Unnamed: 0,InvoiceNo,Channel,Product Name,ProductID,Account,AccountNo,Date,Deadline,Currency,Unit Price,Quantity,Total
7224,8756,Bullseye,"Miele Type U AirClean Bags & Filters, For S700...",H&K/MIE-51069,Sales,5004,2020-01-16,2020-02-26,USD,11.71,23,269.33
7225,8757,Shoppe.com,Verizon LG G2 Chevron Peach Pink Green Red Pat...,CP&A/VER-66222,Sales,5004,2020-01-16,2020-04-26,USD,22.21,5,111.05
7226,8758,Understock.com,Coleman 5620B718G 5GAL BLU Water Carrier,S&O/COL-60210,Sales,5004,2020-01-16,2020-02-07,USD,9.67,3,29.01
7227,8759,Understock.com,"12-Inch & 9-Inch Stainless Steel Tongs Set, Se...",K&D/12--81602,Sales,5004,2020-01-16,2020-04-09,USD,25.85,1,25.85
7228,8760,iBay.com,Coaster Oriental Style 4-Panel Room Screen Div...,H&K/COA-60709,Sales,5004,2020-01-16,2020-05-10,USD,2.40,2,4.80
...,...,...,...,...,...,...,...,...,...,...,...,...
37703,39235,iBay.com,"Nature's Bounty Garlic, 2000mg, Odor-Free, 120...",H&PC/NAT-15470,Sales,5004,2020-03-31,2020-09-17,USD,5.55,2,11.10
37704,39216,Shoppe.com,Funko Wonder Woman POP Heroes,T&G/FUN-03366,Sales,5004,2020-03-31,2020-05-08,USD,28.56,1,28.56
37705,39219,Shoppe.com,MONO GS1 GS1-BTY-BLK-L Betty Long Guitar Strap...,MI/MON-86723,Sales,5004,2020-03-31,2020-04-11,USD,3.33,1,3.33
37706,39238,Shoppe.com,,T&G/MAG-68412,Sales,5004,2020-03-31,2020-03-20,USD,34.76,10,347.60


In [40]:
ledger_df[ledger_df['Date'] > '15th of January, 2020']
#ledger_df[ledger_df['Date'] > '15/1/2020']
#ledger_df[ledger_df['Date'] > '2020, Jan 15']
#ledger_df[ledger_df['Date'] > '1-15-20']
#ledger_df[ledger_df['Date'] > '2020/1/15']

Unnamed: 0,InvoiceNo,Channel,Product Name,ProductID,Account,AccountNo,Date,Deadline,Currency,Unit Price,Quantity,Total
7224,8756,Bullseye,"Miele Type U AirClean Bags & Filters, For S700...",H&K/MIE-51069,Sales,5004,2020-01-16,2020-02-26,USD,11.71,23,269.33
7225,8757,Shoppe.com,Verizon LG G2 Chevron Peach Pink Green Red Pat...,CP&A/VER-66222,Sales,5004,2020-01-16,2020-04-26,USD,22.21,5,111.05
7226,8758,Understock.com,Coleman 5620B718G 5GAL BLU Water Carrier,S&O/COL-60210,Sales,5004,2020-01-16,2020-02-07,USD,9.67,3,29.01
7227,8759,Understock.com,"12-Inch & 9-Inch Stainless Steel Tongs Set, Se...",K&D/12--81602,Sales,5004,2020-01-16,2020-04-09,USD,25.85,1,25.85
7228,8760,iBay.com,Coaster Oriental Style 4-Panel Room Screen Div...,H&K/COA-60709,Sales,5004,2020-01-16,2020-05-10,USD,2.40,2,4.80
...,...,...,...,...,...,...,...,...,...,...,...,...
37703,39235,iBay.com,"Nature's Bounty Garlic, 2000mg, Odor-Free, 120...",H&PC/NAT-15470,Sales,5004,2020-03-31,2020-09-17,USD,5.55,2,11.10
37704,39216,Shoppe.com,Funko Wonder Woman POP Heroes,T&G/FUN-03366,Sales,5004,2020-03-31,2020-05-08,USD,28.56,1,28.56
37705,39219,Shoppe.com,MONO GS1 GS1-BTY-BLK-L Betty Long Guitar Strap...,MI/MON-86723,Sales,5004,2020-03-31,2020-04-11,USD,3.33,1,3.33
37706,39238,Shoppe.com,,T&G/MAG-68412,Sales,5004,2020-03-31,2020-03-20,USD,34.76,10,347.60


In [41]:
import datetime as dt

start_date = dt.datetime(2020, 1, 15)
end_date = dt.datetime(2020, 1, 20)
shift = dt.timedelta(days=1)

# Filter rows in 'ledger_df' where 'Date' is within a specified range
ledger_df[   
            (ledger_df['Date'] > start_date - shift)    # Select rows where 'Date' is after 'start_date' minus 'shift'
            & 
            (ledger_df['Date'] < end_date + shift)      # Select rows where 'Date' is before 'end_date' plus 'shift'
         ]

Unnamed: 0,InvoiceNo,Channel,Product Name,ProductID,Account,AccountNo,Date,Deadline,Currency,Unit Price,Quantity,Total
6748,8280,Shoppe.com,Fender 005-3191-000 1-Ply Black 8-Hole Mount T...,MI/FEN-86342,Sales,5004,2020-01-15,2019-11-08,USD,43.45,6,260.70
6749,8281,Understock.com,3M 6897 Black Head Strap - 70070709152 [PRICE ...,I&S/3M-25157,Sales,5004,2020-01-15,2020-06-23,USD,4.40,14,61.60
6750,8282,Understock.com,Tarantula Sleeve Wrap Armguard (Camo),S&O/TAR-39287,Sales,5004,2020-01-15,2019-10-27,USD,10.57,6,63.42
6751,8283,Walcart,Hubsan X4 H107C 2.4G 4CH RC Quadcopter With Ca...,T&G/HUB-12150,Sales,5004,2020-01-15,2020-05-12,USD,5.46,1,5.46
6752,8284,Understock.com,Reusable Particulate Respirator Mask Size: Medium,I&S/REU-35054,Sales,5004,2020-01-15,2020-02-18,USD,20.68,2,41.36
...,...,...,...,...,...,...,...,...,...,...,...,...
9331,10800,Understock.com,Samsung Galaxy S3 / S 3 / S4/ S 4/ S IV/ Rugge...,CP&A/SAM-81922,Sales,5004,2020-01-20,2019-11-20,USD,12.28,17,208.76
9332,10818,iBay.com,Bushnell Velocity Speed Gun (Colors may vary),S&O/BUS-00706,Sales,5004,2020-01-20,2019-12-12,USD,5.26,3,15.78
9333,10821,iBay.com,Vivitar V69379-SIL 3-IN-1 2 MP Digital Camera ...,C&P/VIV-10923,Sales,5004,2020-01-20,2020-03-15,USD,7.37,6,44.22
9334,10823,Understock.com,Cat People / The Curse of the Cat People (Horr...,M&T/CAT-34864,Sales,5004,2020-01-20,2020-05-16,USD,14.82,76,1126.32


In [42]:
# Filter rows in 'ledger_df' based on specific conditions for the 'Deadline' column
ledger_df[
            (ledger_df['Deadline'].dt.year.isin([2018, 2019])) &     # Select rows where the 'Deadline' year is 2018 or 2019 
            (ledger_df['Deadline'].dt.quarter == 4) &                # Select rows where the 'Deadline' is in the 4th quarter
            (ledger_df['Deadline'].dt.day_name() == 'Thursday')      # Select rows where the 'Deadline' falls on a Thursday
          ]


Unnamed: 0,InvoiceNo,Channel,Product Name,ProductID,Account,AccountNo,Date,Deadline,Currency,Unit Price,Quantity,Total
6,1538,Understock.com,,S&O/TEX-91494,Sales,5004,2020-01-01,2019-10-31,USD,31.36,9,282.24
22,1554,iBay.com,Nerf N-Sports Weather Blitz All Conditions Foo...,T&G/NER-98948,Sales,5004,2020-01-01,2019-10-24,USD,25.95,3,77.85
38,1570,Walcart,DR Strings Nickel Lo-Rider - Nickel Plated Hex...,MI/DR-99810,Sales,5004,2020-01-01,2019-11-14,USD,13.08,1,13.08
65,1597,Walcart,"Tork Dispenser Napkins, Interfold,13 Width x 8...",H&PC/TOR-78048,Sales,5004,2020-01-01,2019-11-28,USD,5.23,17,88.91
85,1617,Walcart,Battery Back Door Cover Replacement for Samsun...,CP&A/BAT-49908,Sales,5004,2020-01-01,2019-11-07,USD,23.86,4,95.44
...,...,...,...,...,...,...,...,...,...,...,...,...
35634,37166,Walcart,Olde Thompson 8-Inch Senator Walnut Peppermill,K&D/OLD-76861,Sales,5004,2020-03-22,2019-12-26,USD,26.47,1,26.47
35656,37188,iBay.com,Remo Controlled Sound Coated Drum Head with Re...,MI/REM-66035,Sales,5004,2020-03-22,2019-12-26,USD,4.96,16,79.36
35987,37519,iBay.com,MPERO Collection 3 Pack of Mirror Screen Prote...,CP&A/MPE-10829,Sales,5004,2020-03-23,2019-12-26,USD,10.29,6,61.74
36115,37647,Understock.com,Magline GMK81UA4 Gemini Sr Convertible Hand Tr...,I&S/MAG-94877,Sales,5004,2020-03-24,2019-12-26,USD,18.25,12,219.00


### Converting dates back to strings

In [44]:
# Format the 'Deadline' column to display the date in a custom string format
ledger_df['Deadline'].dt.strftime("%A, %d %B, %Y")  

0         Saturday, 23 November, 2019
1               Monday, 15 June, 2020
2              Thursday, 07 May, 2020
3           Sunday, 22 December, 2019
4           Sunday, 22 December, 2019
                     ...             
37703    Thursday, 17 September, 2020
37704            Friday, 08 May, 2020
37705        Saturday, 11 April, 2020
37706          Friday, 20 March, 2020
37707        Thursday, 02 April, 2020
Name: Deadline, Length: 37708, dtype: object

### Pandas date arithmetic

In [46]:
ledger_df['Deadline']

0       2019-11-23
1       2020-06-15
2       2020-05-07
3       2019-12-22
4       2019-12-22
           ...    
37703   2020-09-17
37704   2020-05-08
37705   2020-04-11
37706   2020-03-20
37707   2020-04-02
Name: Deadline, Length: 37708, dtype: datetime64[ns]

In [47]:
ledger_df['Deadline'] + pd.Timedelta(days=2)

0       2019-11-25
1       2020-06-17
2       2020-05-09
3       2019-12-24
4       2019-12-24
           ...    
37703   2020-09-19
37704   2020-05-10
37705   2020-04-13
37706   2020-03-22
37707   2020-04-04
Name: Deadline, Length: 37708, dtype: datetime64[ns]

In [48]:
ledger_df['Deadline'] - pd.Timedelta(weeks=4)

0       2019-10-26
1       2020-05-18
2       2020-04-09
3       2019-11-24
4       2019-11-24
           ...    
37703   2020-08-20
37704   2020-04-10
37705   2020-03-14
37706   2020-02-21
37707   2020-03-05
Name: Deadline, Length: 37708, dtype: datetime64[ns]

In [49]:
ledger_df['Deadline'] + pd.Timedelta(weeks=4, days=3, hours=2, minutes=1)

0       2019-12-24 02:01:00
1       2020-07-16 02:01:00
2       2020-06-07 02:01:00
3       2020-01-22 02:01:00
4       2020-01-22 02:01:00
                ...        
37703   2020-10-18 02:01:00
37704   2020-06-08 02:01:00
37705   2020-05-12 02:01:00
37706   2020-04-20 02:01:00
37707   2020-05-03 02:01:00
Name: Deadline, Length: 37708, dtype: datetime64[ns]

In [50]:
ledger_df['Date'] - ledger_df['Deadline']

0         39 days
1       -166 days
2       -127 days
3         10 days
4         10 days
           ...   
37703   -170 days
37704    -38 days
37705    -11 days
37706     11 days
37707     -2 days
Length: 37708, dtype: timedelta64[ns]

In [51]:
(ledger_df['Date'] - ledger_df['Deadline']).iloc[0]

Timedelta('39 days 00:00:00')

In [52]:
pd.Timedelta(days=2) + pd.Timedelta(weeks=1)

Timedelta('9 days 00:00:00')

To express this difference in hours, you can simply divide the output of subtracting the two columns
by a Timedelta that uses the hour time interval.

In [54]:
(ledger_df['Date'] - ledger_df['Deadline']) / pd.Timedelta(hours=1)

0         936.0
1       -3984.0
2       -3048.0
3         240.0
4         240.0
          ...  
37703   -4080.0
37704    -912.0
37705    -264.0
37706     264.0
37707     -48.0
Length: 37708, dtype: float64

### Date offsets

To add 2 days to each value in the 'Deadline' column you can use:

In [57]:
ledger_df['Deadline'] + pd.DateOffset(days=2)  # Equivalent to: ledger_df['Deadline'] + pd.Timedelta(days=2)

0       2019-11-25
1       2020-06-17
2       2020-05-09
3       2019-12-24
4       2019-12-24
           ...    
37703   2020-09-19
37704   2020-05-10
37705   2020-04-13
37706   2020-03-22
37707   2020-04-04
Name: Deadline, Length: 37708, dtype: datetime64[ns]

In [58]:
ledger_df['Deadline'] + pd.Timedelta(days=2)

0       2019-11-25
1       2020-06-17
2       2020-05-09
3       2019-12-24
4       2019-12-24
           ...    
37703   2020-09-19
37704   2020-05-10
37705   2020-04-13
37706   2020-03-22
37707   2020-04-04
Name: Deadline, Length: 37708, dtype: datetime64[ns]

In [59]:
ledger_df['Deadline']

0       2019-11-23
1       2020-06-15
2       2020-05-07
3       2019-12-22
4       2019-12-22
           ...    
37703   2020-09-17
37704   2020-05-08
37705   2020-04-11
37706   2020-03-20
37707   2020-04-02
Name: Deadline, Length: 37708, dtype: datetime64[ns]

In [60]:
ledger_df['Deadline'] + pd.DateOffset(years=2, months=5)

0       2022-04-23
1       2022-11-15
2       2022-10-07
3       2022-05-22
4       2022-05-22
           ...    
37703   2023-02-17
37704   2022-10-08
37705   2022-09-11
37706   2022-08-20
37707   2022-09-02
Name: Deadline, Length: 37708, dtype: datetime64[ns]

### Periods

In [62]:
ledger_df['Deadline']

0       2019-11-23
1       2020-06-15
2       2020-05-07
3       2019-12-22
4       2019-12-22
           ...    
37703   2020-09-17
37704   2020-05-08
37705   2020-04-11
37706   2020-03-20
37707   2020-04-02
Name: Deadline, Length: 37708, dtype: datetime64[ns]

For instance, to convert all dates in the 'Deadline' column into a Series of quarterly periods, you can use:

In [64]:
ledger_df['Deadline'].dt.to_period(freq='Q')

0        2019Q4
1        2020Q2
2        2020Q2
3        2019Q4
4        2019Q4
          ...  
37703    2020Q3
37704    2020Q2
37705    2020Q2
37706    2020Q1
37707    2020Q2
Name: Deadline, Length: 37708, dtype: period[Q-DEC]

In [65]:
# Convert 'Deadline' column to period with quarterly frequency ending in September ('Q-SEP')
ledger_df['Deadline'].dt.to_period(freq='Q-SEP')

0        2020Q1
1        2020Q3
2        2020Q3
3        2020Q1
4        2020Q1
          ...  
37703    2020Q4
37704    2020Q3
37705    2020Q3
37706    2020Q2
37707    2020Q3
Name: Deadline, Length: 37708, dtype: period[Q-SEP]

In [66]:
ledger_df['Deadline']

0       2019-11-23
1       2020-06-15
2       2020-05-07
3       2019-12-22
4       2019-12-22
           ...    
37703   2020-09-17
37704   2020-05-08
37705   2020-04-11
37706   2020-03-20
37707   2020-04-02
Name: Deadline, Length: 37708, dtype: datetime64[ns]

In [67]:
# Y stands for year
ledger_df['Deadline'].dt.to_period(freq='Y')

0        2019
1        2020
2        2020
3        2019
4        2019
         ... 
37703    2020
37704    2020
37705    2020
37706    2020
37707    2020
Name: Deadline, Length: 37708, dtype: period[Y-DEC]

### Overthinking: Timezones

In [69]:
ledger_df['Deadline']

0       2019-11-23
1       2020-06-15
2       2020-05-07
3       2019-12-22
4       2019-12-22
           ...    
37703   2020-09-17
37704   2020-05-08
37705   2020-04-11
37706   2020-03-20
37707   2020-04-02
Name: Deadline, Length: 37708, dtype: datetime64[ns]

In [70]:
# Localize the 'Deadline' column to US Pacific timezone
us_pacific_deadlines = ledger_df['Deadline'].dt.tz_localize('US/Pacific')
us_pacific_deadlines

0       2019-11-23 00:00:00-08:00
1       2020-06-15 00:00:00-07:00
2       2020-05-07 00:00:00-07:00
3       2019-12-22 00:00:00-08:00
4       2019-12-22 00:00:00-08:00
                   ...           
37703   2020-09-17 00:00:00-07:00
37704   2020-05-08 00:00:00-07:00
37705   2020-04-11 00:00:00-07:00
37706   2020-03-20 00:00:00-07:00
37707   2020-04-02 00:00:00-07:00
Name: Deadline, Length: 37708, dtype: datetime64[ns, US/Pacific]

In [71]:
# Convert the timezone of the 'us_pacific_deadlines' column from 'US/Pacific' to 'US/Eastern'
us_pacific_deadlines.dt.tz_convert('US/Eastern')

0       2019-11-23 03:00:00-05:00
1       2020-06-15 03:00:00-04:00
2       2020-05-07 03:00:00-04:00
3       2019-12-22 03:00:00-05:00
4       2019-12-22 03:00:00-05:00
                   ...           
37703   2020-09-17 03:00:00-04:00
37704   2020-05-08 03:00:00-04:00
37705   2020-04-11 03:00:00-04:00
37706   2020-03-20 03:00:00-04:00
37707   2020-04-02 03:00:00-04:00
Name: Deadline, Length: 37708, dtype: datetime64[ns, US/Eastern]

In [72]:
# Convert the timezone of the 'us_pacific_deadlines' column from 'US/Pacific' to 'Europe/Berlin'
us_pacific_deadlines.dt.tz_convert('Europe/Berlin')

0       2019-11-23 09:00:00+01:00
1       2020-06-15 09:00:00+02:00
2       2020-05-07 09:00:00+02:00
3       2019-12-22 09:00:00+01:00
4       2019-12-22 09:00:00+01:00
                   ...           
37703   2020-09-17 09:00:00+02:00
37704   2020-05-08 09:00:00+02:00
37705   2020-04-11 09:00:00+02:00
37706   2020-03-20 08:00:00+01:00
37707   2020-04-02 09:00:00+02:00
Name: Deadline, Length: 37708, dtype: datetime64[ns, Europe/Berlin]

Conversely, if you need to remove timezone information from a
datetime64 column, you can use tz_localize and pass None as the
argument:

In [74]:
us_pacific_deadlines.dt.tz_localize(None)

0       2019-11-23
1       2020-06-15
2       2020-05-07
3       2019-12-22
4       2019-12-22
           ...    
37703   2020-09-17
37704   2020-05-08
37705   2020-04-11
37706   2020-03-20
37707   2020-04-02
Name: Deadline, Length: 37708, dtype: datetime64[ns]