## Datetime objects

In [1]:
# Import date from datetime
from datetime import date

# Create a date object
hurricane_andrew = date(1992, 8, 24)
# Which day of the week is the date?
print(hurricane_andrew.weekday())  #  0 = Monday, 1 = Tuesday, ...

0


In [2]:
import pandas as pd
df_ex1_dates = pd.read_csv("./data/exercise_dates.csv")
df_ex1_dates.head()

Unnamed: 0,year,month,day
0,1950,8,31
1,1950,9,5
2,1950,10,18
3,1950,10,21
4,1951,5,18


In [3]:
"""
Creates a list of datetime objects from a dateframe with year, month and day columns
"""
def get_date_list(df):
    # create list of datetime objects
    df["str_dates"] = df["year"].astype('string') + "-" +\
                                df["month"].astype('string') + "-" +\
                                df["day"].astype('string')
    df["dt_dates"] = pd.to_datetime(df["str_dates"])  # creates Timestamp objects
    df["dt_dates"] = df["dt_dates"].apply(lambda x: x.date())
    
    return(df["dt_dates"].to_list())

# test, expects: length of list: 235, first_item: 1950-08-31, last item: 2017-10-29
florida_hurricane_dates = get_date_list(df_ex1_dates)
# print(f"length of list: {len(florida_hurricane_dates)}, first_item: {florida_hurricane_dates[0]}, last item: {florida_hurricane_dates[-1]}")

In [4]:
# Counter for how many before June 1
early_hurricanes = 0

# iterate over the dates
for hurricane in florida_hurricane_dates:
  # Check if the month is before June (month number 6)
  if hurricane.month < 6:
    early_hurricanes = early_hurricanes + 1
    
print(early_hurricanes)

10


## Date math

In [5]:
# Import date
from datetime import date

# Create a date object for May 9th, 2007
start = date(2007, 5, 9)

# Create a date object for December 13th, 2007
end = date(2007, 12, 13)

# Subtract the two dates and print the number of days
print((end - start).days)

218


In [6]:
# A dictionary to count hurricanes per calendar month
hurricanes_each_month = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6:0,
		  				 7: 0, 8:0, 9:0, 10:0, 11:0, 12:0}

# iterate over all hurricanes
for hurricane in florida_hurricane_dates:
  # extract the month
  month = hurricane.month
  # update the count in dict
  hurricanes_each_month[month] += 1
  
print(hurricanes_each_month)

{1: 0, 2: 1, 3: 0, 4: 1, 5: 8, 6: 32, 7: 21, 8: 49, 9: 70, 10: 43, 11: 9, 12: 1}


In [7]:
df_dates_scrambled = pd.read_csv('./data/scrambled_dates.csv')
dates_scrambled = get_date_list(df_dates_scrambled)

# print first and last scrambled dates
print(dates_scrambled[0])
print(dates_scrambled[-1], "\n")

# sort the list of dates
dates_ordered = sorted(dates_scrambled)

# print first and last ordered dates
print(dates_ordered[0])
print(dates_ordered[-1])

1988-08-04
2011-07-18 

1950-08-31
2017-10-29


## Turning dates into strings

In [8]:
from datetime import date 
# Example date 
d = date(2017, 11, 5)  
# ISO 8601 format by default: YYYY-MM-DD 
print(d, "\n")

# Express the date in ISO 8601 format explicitly and put it in a list 
print([d.isoformat()]) 

2017-11-05 

['2017-11-05']


In [9]:
 # A few dates that computers once had trouble with 
some_dates = ['2000-01-01', '1999-12-31']  
# Print them in order - sorted function sorts ISO 8601 formatted dates correctly if they are expressed as strings
print(sorted(some_dates)) 

['1999-12-31', '2000-01-01']


In [10]:
# Assign the earliest date to first_date
first_date = sorted(florida_hurricane_dates)[0]

# Convert to ISO and US formats
iso = "Our earliest hurricane date: " + first_date.isoformat()
us = "Our earliest hurricane date: " + first_date.strftime("%m/%d/%Y")

print("ISO: " + iso)
print("US: " + us)

ISO: Our earliest hurricane date: 1950-08-31
US: Our earliest hurricane date: 08/31/1950


In [11]:
# Create a date object
andrew = date(1992, 8, 26)

# Print the date in the format 'YYYY-MM'
print(andrew.strftime("%Y-%m"))

1992-08


In [12]:
# Create a date object
andrew = date(1992, 8, 26)

# Print the date in the format 'MONTH (YYYY)'
print(andrew.strftime("%B (%Y)"))

August (1992)


In [13]:
# Create a date object
andrew = date(1992, 8, 26)

# Print the date in the format 'YYYY-DDD'
print(andrew.strftime("%Y-%j"))

1992-239


## Analyzing Bike Sharing Data

In [14]:
# read just the start and end date columns of capital bike csv data
df_bike = pd.read_csv('./data/capital-onebike.csv', usecols=[0, 1], parse_dates=[0, 1])
print(df_bike.columns)
print(f"dtype of Start date column: {df_bike['Start date'].dtype}, dtype of End date column: {df_bike['End date'].dtype}")
df_bike.head()

Index(['Start date', 'End date'], dtype='object')
dtype of Start date column: datetime64[ns], dtype of End date column: datetime64[ns]


Unnamed: 0,Start date,End date
0,2017-10-01 15:23:25,2017-10-01 15:26:26
1,2017-10-01 15:42:57,2017-10-01 17:49:59
2,2017-10-02 06:37:10,2017-10-02 06:42:53
3,2017-10-02 08:56:45,2017-10-02 09:18:03
4,2017-10-02 18:23:48,2017-10-02 18:45:05


In [15]:
from datetime import datetime

# convert to datetime63[s], see third example here:
# https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-and-timedelta-arithmetic
df_bike["Start date"] = df_bike["Start date"].astype('datetime64[s]')
df_bike["End date"] = df_bike["Start date"].astype('datetime64[s]')
# df_bike.head()
onebike_datetimes = []

# iterate through the rows
for row in df_bike.itertuples(index=False, name=None):
    item = {"start": row[0], "end": row[1]}
    onebike_datetimes.append(item)

onebike_datetimes[0:4]

[{'start': Timestamp('2017-10-01 15:23:25'),
  'end': Timestamp('2017-10-01 15:23:25')},
 {'start': Timestamp('2017-10-01 15:42:57'),
  'end': Timestamp('2017-10-01 15:42:57')},
 {'start': Timestamp('2017-10-02 06:37:10'),
  'end': Timestamp('2017-10-02 06:37:10')},
 {'start': Timestamp('2017-10-02 08:56:45'),
  'end': Timestamp('2017-10-02 08:56:45')}]

### How many trips before and after noon?

In [16]:
# Create dictionary to hold results
trip_counts = {'AM': 0, 'PM': 0}
  
# Loop over all trips
for trip in onebike_datetimes:
  # Check to see if the trip starts before noon
  if trip['start'].hour < 12:
    # Increment the counter for before noon
    trip_counts['AM'] += 1
  else:
    # Increment the counter for after noon
    trip_counts['PM'] += 1
  
print(trip_counts)

{'AM': 94, 'PM': 196}


## Printing and parsing datetimes

`strftime` codes can be found here:
+ https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes  or
+ https://strftime.org/

In [17]:
dt = datetime.strptime("12/30/2017 15:19:13",  
                       "%m/%d/%Y %H:%M:%S")
print(dt)
print(type(dt))

2017-12-30 15:19:13
<class 'datetime.datetime'>


In [18]:
# Starting string, in YYYY-MM-DD HH:MM:SS format
s = '2017-02-03 00:00:01'

# Write a format string to parse s
fmt = '%Y-%m-%d %H:%M:%S'

# Create a datetime object d
d = datetime.strptime(s, fmt)

# Print d
print(d)

2017-02-03 00:00:01


In [19]:
# Starting string, in YYYY-MM-DD format
s = '2030-10-15'

# Write a format string to parse s
fmt = '%Y-%m-%d'

# Create a datetime object d
d = datetime.strptime(s, fmt)

# Print d
print(d)

2030-10-15 00:00:00


In [20]:
# Starting string, in MM/DD/YYYY HH:MM:SS format
s = '12/15/1986 08:00:00'

# Write a format string to parse s
fmt = '%m/%d/%Y %H:%M:%S'

# Create a datetime object d
d = datetime.strptime(s, fmt)

# Print d
print(d)

1986-12-15 08:00:00


In [21]:
# read in the exercise date string
df_datetime_strings = pd.read_csv('./data/onebike_datetime_strings.csv', usecols=[0, 1], dtype={"start": str, "end": str})
onebike_datetime_strings = []
for row in df_datetime_strings.itertuples(index=False, name=None):
    onebike_datetime_strings.append((row[0], row[1]))

onebike_datetime_strings[:3]

[('2017-10-01 15:23:25', '2017-10-01 15:26:26'),
 ('2017-10-01 15:42:57', '2017-10-01 17:49:59'),
 ('2017-10-02 06:37:10', '2017-10-02 06:42:53')]

In [22]:
# write out the format string
fmt = "%Y-%m-%d %H:%M:%S"

# Initialize a list for holding the pairs of datetime objects
onebike_datetimes = []

# Loop over all trips
for (start, end) in onebike_datetime_strings:
    trip = {'start': datetime.strptime(start, fmt),
            'end': datetime.strptime(end, fmt)}
    # Append the trip
    onebike_datetimes.append(trip)

print(type(onebike_datetimes[0]["start"]))
print(onebike_datetimes[0]["start"])

<class 'datetime.datetime'>
2017-10-01 15:23:25


In [23]:
# Pull out the start of the first trip
first_start = onebike_datetimes[0]['start']

# Format to feed to strftime()
fmt = "%Y-%m-%dT%H:%M:%S"

# Print out date with .isoformat(), then with .strftime() to compare
print(first_start.isoformat())
print(first_start.strftime(fmt))

2017-10-01T15:23:25
2017-10-01T15:23:25


In [24]:
# Starting timestamps
timestamps = [1514665153, 1514664543]

# Datetime objects
dts = []

# Loop
for ts in timestamps:
  dts.append(datetime.fromtimestamp(ts))
  
# Print results
print(dts)

[datetime.datetime(2017, 12, 30, 13, 19, 13), datetime.datetime(2017, 12, 30, 13, 9, 3)]


## Working with durations

In [25]:
 # Create example datetimes 
start = datetime(2017, 10, 8, 23, 46, 47) 
end = datetime(2017, 10, 9, 0, 10, 57) 

# Subtract datetimes to create a timedelta 
duration = end - start 
print(duration.total_seconds()) 

1450.0


In [26]:
 # Import timedelta 
from datetime import timedelta 
# Create a timedelta 
delta1 = timedelta(seconds=1)

print(start)
# One second later 
print(start + delta1) 

2017-10-08 23:46:47
2017-10-08 23:46:48


In [27]:
 # Create a one day and one second timedelta 
delta2 = timedelta(days=1, seconds=1) 
print(start)

# One day and one second later 
print(start + delta2) 

2017-10-08 23:46:47
2017-10-09 23:46:48


In [28]:
 # Create a negative timedelta of one week 
delta3 = timedelta(weeks=-1) 
print(start)

# One week earlier 
print(start + delta3)
# can get same result by subtracting a positive timedelta
delta4 = timedelta(weeks=1)
# also one week earlier 
print(start - delta4) 

2017-10-08 23:46:47
2017-10-01 23:46:47
2017-10-01 23:46:47


## Turning pairs of datetimes into durations

When working with timestamps, we often want to know how much time has elapsed between events. We can use datetime arithmetic to ask Python to do the heavy lifting for us so we don't need to worry about day, month, or year boundaries. Let's calculate the number of seconds that the bike was out of the dock for each trip.

Continuing our work from a previous cells, each element of the `onebike_datetimes` list consists of two datetime objects, corresponding to the start and end of a trip, respectively.

In [29]:
# Initialize a list for all the trip durations
onebike_durations = []

for trip in onebike_datetimes:
    # Create a timedelta object corresponding to the length of the trip
    trip_duration = trip['end'] - trip['start']
  
    # Get the total elapsed seconds in trip_duration
    trip_length_seconds = trip_duration.total_seconds()
  
    # Append the results to our list
    onebike_durations.append(trip_length_seconds)

## Average trip time

W20529 took 291 trips in our data set. How long were the trips on average?


In [30]:
# What was the total duration of all trips?
total_elapsed_time = sum(onebike_durations)

# What was the total number of trips?
number_of_trips = len(onebike_durations)
  
# Divide the total duration by the number of trips
print(total_elapsed_time / number_of_trips)

1178.9310344827586


## The long and the short of why time is hard

Out of 291 trips taken by W20529, how long was the longest? How short was the shortest? Does anything look fishy?

In [31]:
# Calculate shortest and longest trips
shortest_trip = min(onebike_durations)
longest_trip = max(onebike_durations)

# Print out the results
print("The shortest trip was " + str(shortest_trip) + " seconds")
print("The longest trip was " + str(longest_trip) + " seconds")

The shortest trip was -3346.0 seconds
The longest trip was 76913.0 seconds


## Weird huh?!

For at least one trip, the bike returned before it left. This was caused by not accounting for Daylight Savings time. We'll deal with this in the next section.

## UTC offets

+ **naive** `datetime` and `timedelta` objects don't know anything about timezones
+ We need to use `timezone` objects to make these objects _timezone aware_

In [32]:
# Import relevant classes 
from datetime import datetime, timedelta, timezone 
# US Eastern Standard time zone - this is the timezone the bike data was collected in
ET = timezone(timedelta(hours=-5))  
# Timezone-aware datetime 
dt = datetime(2017, 12, 30, 15, 9, 3, tzinfo = ET)
print(dt)  # now UTC offset is included

2017-12-30 15:09:03-05:00


### What would the times be if the data was collected in India Standard time zone?

In [33]:
 # India Standard time zone 
IST = timezone(timedelta(hours=5, minutes=30)) 
# Convert to IST 
print(dt.astimezone(IST))  # same moment (time of first ride), different clock

2017-12-31 01:39:03+05:30


## Adjusting timezone vs changing tzinfo


In [34]:
print(dt)  # original time and timezone
print(dt.replace(tzinfo=timezone.utc))  # change just the timezone
# Change original to match UTC 
print(dt.astimezone(timezone.utc))      # same original time, but expressed as UTC

2017-12-30 15:09:03-05:00
2017-12-30 15:09:03+00:00
2017-12-30 20:09:03+00:00


In [35]:
# Import datetime, timedelta, timezone
from datetime import datetime, timedelta, timezone

# Create a timezone for Pacific Standard Time, or UTC-8
pst = timezone(timedelta(hours=-8))

# October 1, 2017 at 15:26:26, UTC-8
dt = datetime(2017, 10, 1, 15, 26, 26, tzinfo=pst)

# Print results
print(dt.isoformat())

2017-10-01T15:26:26-08:00


In [36]:
# Import datetime, timedelta, timezone
from datetime import datetime, timedelta, timezone

# Create a timezone for Australian Eastern Daylight Time, or UTC+11
aedt = timezone(timedelta(hours=11))

# October 1, 2017 at 15:26:26, UTC+11
dt = datetime(2017, 10, 1, 15, 26, 26, tzinfo=aedt)

# Print results
print(dt.isoformat())

2017-10-01T15:26:26+11:00


In [37]:
# Create a timezone object corresponding to UTC-4
edt = timezone(timedelta(hours=-4))

# Loop over trips, updating the start and end datetimes to be in UTC-4
for trip in onebike_datetimes[:10]:
    # Update trip['start'] and trip['end']
    trip['start'] = trip['start'].replace(tzinfo=edt)
    trip['end'] = trip['end'].replace(tzinfo=edt)

## What time did the bike leave in UTC?

Having set the timezone for the first ten rides that W20529 took, let's see what time the bike left in UTC.

In [38]:
# Loop over the trips
for trip in onebike_datetimes[:10]:
    # Pull out the start
    dt = trip['start']
    # Move dt to be in UTC
    dt = dt.astimezone(timezone.utc)
  
    # Print the start time in UTC
    print('Original:', trip['start'], '| UTC:', dt.isoformat())

Original: 2017-10-01 15:23:25-04:00 | UTC: 2017-10-01T19:23:25+00:00
Original: 2017-10-01 15:42:57-04:00 | UTC: 2017-10-01T19:42:57+00:00
Original: 2017-10-02 06:37:10-04:00 | UTC: 2017-10-02T10:37:10+00:00
Original: 2017-10-02 08:56:45-04:00 | UTC: 2017-10-02T12:56:45+00:00
Original: 2017-10-02 18:23:48-04:00 | UTC: 2017-10-02T22:23:48+00:00
Original: 2017-10-02 18:48:08-04:00 | UTC: 2017-10-02T22:48:08+00:00
Original: 2017-10-02 19:18:10-04:00 | UTC: 2017-10-02T23:18:10+00:00
Original: 2017-10-02 19:37:32-04:00 | UTC: 2017-10-02T23:37:32+00:00
Original: 2017-10-03 08:24:16-04:00 | UTC: 2017-10-03T12:24:16+00:00
Original: 2017-10-03 18:17:07-04:00 | UTC: 2017-10-03T22:17:07+00:00


## Putting the bike trips into the right time zone

Instead of setting the timezones for W20529 by hand, let's assign them to their IANA timezone: 'America/New_York'. Since we know their political jurisdiction, we don't need to look up their UTC offset. Python will do that for us.

In [39]:
# Import tz
from dateutil import tz

# Create a timezone object for Eastern Time
et = tz.gettz('America/New_York')

# Loop over trips, updating the datetimes to be in Eastern Time
for trip in onebike_datetimes[:10]:
    # Update trip['start'] and trip['end']
    trip['start'] = trip['start'].replace(tzinfo=et)
    trip['end'] = trip['end'].replace(tzinfo=et)

## What time did the bike leave? (Global edition)

When we need to move a datetime from one timezone into another, use `.astimezone()` and `tz`. Often we will be moving things into UTC, but for fun let's try moving things from 'America/New_York' into a few different time zones.

In [40]:
# Create the timezone object
uk = tz.gettz('Europe/London')

# Pull out the start of the first trip
local = onebike_datetimes[0]['start']

# What time was it in the UK?
notlocal = local.astimezone(uk)

# Print them out and see the difference
print(local.isoformat())
print(notlocal.isoformat())

2017-10-01T15:23:25-04:00
2017-10-01T20:23:25+01:00


In [41]:
# Create the timezone object
ist = tz.gettz('Asia/Kolkata')

# Pull out the start of the first trip
local = onebike_datetimes[0]['start']

# What time was it in India?
notlocal = local.astimezone(ist)

# Print them out and see the difference
print(local.isoformat())
print(notlocal.isoformat())

2017-10-01T15:23:25-04:00
2017-10-02T00:53:25+05:30


In [42]:
# Create the timezone object
sm = tz.gettz('Pacific/Apia')

# Pull out the start of the first trip
local = onebike_datetimes[0]['start']

# What time was it in Samoa?
notlocal = local.astimezone(sm)

# Print them out and see the difference
print(local.isoformat())
print(notlocal.isoformat())

2017-10-01T15:23:25-04:00
2017-10-02T09:23:25+14:00


## Daylight Savings Time

Start with an example:  <img src='daylight_savings_example.png'>

In [44]:
# create naive dt (no tz information) object to see this more clearly
spring_ahead_159am = datetime(2017, 3, 12, 1, 59, 59) 
print(spring_ahead_159am.isoformat())

spring_ahead_3am = datetime(2017, 3, 12, 3, 0, 0) 
print(spring_ahead_3am.isoformat())

2017-03-12T01:59:59
2017-03-12T03:00:00


In [45]:
# how much time has elapsed between these instances?
elapsed_naive = (spring_ahead_3am - spring_ahead_159am).total_seconds()
print(elapsed_naive)  # an hour and 1 second as we'd expect, but in reality, only a second really elapsed

3601.0


In [None]:
# fixing the calculation to account for DST
