# Programming for Data Science Summary
## Chapter 09 - Handling Dates

In [2]:
import pandas as pd
from datetime import time, timedelta, date, datetime
from dateutil.parser import parse 
import pytz


## Datetime Objects

In [3]:
# 1. Create new instances
time(10, 20, 30, 40) # Create instance of time 10:20:30:40 (hour, minute, second, microsecond)
date(2025, 12, 1) # Create instance of date 2025/12/1 (follows order year-month-day)

datetime(2025, 12, 1, 10, 20, 30, 40) # Create instance of datetime, combining the previous objects

d = date(2025,12,1)
t = time(10,11,12)
datetime.combine(d, t) # Combine date from two instances

timedelta(days=1) # Creates timedelta instance of one day

display()

See the documentation to see what methods you can use on the objects.

https://docs.python.org/3/library/datetime.html#timedelta-objects

https://docs.python.org/3/library/datetime.html#date-objects

https://docs.python.org/3/library/datetime.html#datetime.date

https://docs.python.org/3/library/datetime.html#time-objects


In [5]:
# 2.1. datetime objects <-> tuples
d = datetime(2025, 12, 1, 10, 20, 30, 40)
t = d.timetuple()
new_d = datetime(*t[:6])

display()

In [None]:
# 2.2. Conversion string <-> datetime objects

# str -> date
s = '12-01-2024 | 12:38:10'
datetime.strptime(s, '%d-%m-%Y | %H:%M:%S') # Can personalize the format in any way, see table below for special characters

s = '1, November 23, 10:10am'
parse(s) # Works with common cases

# date -> str
d = datetime(2025, 12, 1, 10, 20, 30, 40)
d.strftime('%d-%M-%Y :: %H,%M') # Same as strptime but inverse operation

display()


Time formatting is same as done in the *C programming language*, see the documentation below:

https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes

In [4]:
# 3. Get functions for current timedate
datetime.today() # Get current date
datetime.now() # Get current datetime

display()

In [9]:
# 4. Datetime algebra
# Note: datetime algebra operations usually lead to timedelta instances
d1 = datetime.today()
d2 = datetime(2025,12,1)
delta1 = d2-d1

# Or you can do operations of type datetime x timedelta -> timedelta
d1 = datetime.today()
delta = timedelta(days=100)
delta2 = d1 + delta

display()

## 2. Timezones

In [11]:
# Handling timezones
tokyo_timezone = pytz.timezone('Asia/Tokyo') # create a timezone instance, which will be used to convert timedates
rome_timezone = pytz.timezone('Europe/Rome')

now = datetime.now()
jp_now = tokyo_timezone.localize(now) # datetime -> localized datetime
it_now = jp_now.astimezone(rome_timezone) # localized datetime -> another localized datetime

## 3. Integration with Pandas

In [None]:
# 1. Load dataframe and parsing data at the same time
d1 = pd.read_csv('data.csv', parse_dates=["Date of birth"]) # Automatically parse dates in the specified list. Might not work

d = pd.read_csv('dates.csv', parse_dates={"Date": ["Year", "Month", "Day"]}) # Parse dates from multiple columns into one single column

raw = pd.read_csv("data.csv")
raw['Date of birth'] = pd.to_datetime(raw['Date of birth'], format="%Y-%M-%d", errors="coerce") # Manually parse date
    # format allows us to specify format, like in strptime or strftime
    # errors allows us to handle cases where parser fails (NaT)

# 2. Use datetime methods or attributes
d['Date'].dt.year
d['Date'].dt.day_of_year
d['Date'].dt.month_name()
d['Date'].dt.day_name()

# 3. Resample method - similar to aggregation
d1.resample('M', on='Date of birth')['Job Title'].agg(pd.Series.mode)

display()


  d = pd.read_csv('dates.csv', parse_dates={"Date": ["Year", "Month", "Day"]}) # Parse dates from multiple columns into one single column
  d1.resample('M', on='Date of birth')['Job Title'].agg(pd.Series.mode)


For more attributes or methods see Pandas docs:

https://pandas.pydata.org/docs/reference/series.html#datetime-properties
