## Datetime objects

In [1]:
# Import date from datetime
from datetime import date

# Create a date object
hurricane_andrew = date(1992, 8, 24)
# Which day of the week is the date?
print(hurricane_andrew.weekday())  #  0 = Monday, 1 = Tuesday, ...

0


In [2]:
import pandas as pd
df_ex1_dates = pd.read_csv("./data/exercise_dates.csv")
df_ex1_dates.head()

Unnamed: 0,year,month,day
0,1950,8,31
1,1950,9,5
2,1950,10,18
3,1950,10,21
4,1951,5,18


In [3]:
"""
Creates a list of datetime objects from a dateframe with year, month and day columns
"""
def get_date_list(df):
    # create list of datetime objects
    df["str_dates"] = df["year"].astype('string') + "-" +\
                                df["month"].astype('string') + "-" +\
                                df["day"].astype('string')
    df["dt_dates"] = pd.to_datetime(df["str_dates"])  # creates Timestamp objects
    df["dt_dates"] = df["dt_dates"].apply(lambda x: x.date())
    
    return(df["dt_dates"].to_list())

# test, expects: length of list: 235, first_item: 1950-08-31, last item: 2017-10-29
florida_hurricane_dates = get_date_list(df_ex1_dates)
# print(f"length of list: {len(florida_hurricane_dates)}, first_item: {florida_hurricane_dates[0]}, last item: {florida_hurricane_dates[-1]}")

In [4]:
# Counter for how many before June 1
early_hurricanes = 0

# iterate over the dates
for hurricane in florida_hurricane_dates:
  # Check if the month is before June (month number 6)
  if hurricane.month < 6:
    early_hurricanes = early_hurricanes + 1
    
print(early_hurricanes)

10


## Date math

In [5]:
# Import date
from datetime import date

# Create a date object for May 9th, 2007
start = date(2007, 5, 9)

# Create a date object for December 13th, 2007
end = date(2007, 12, 13)

# Subtract the two dates and print the number of days
print((end - start).days)

218


In [6]:
# A dictionary to count hurricanes per calendar month
hurricanes_each_month = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6:0,
		  				 7: 0, 8:0, 9:0, 10:0, 11:0, 12:0}

# iterate over all hurricanes
for hurricane in florida_hurricane_dates:
  # extract the month
  month = hurricane.month
  # update the count in dict
  hurricanes_each_month[month] += 1
  
print(hurricanes_each_month)

{1: 0, 2: 1, 3: 0, 4: 1, 5: 8, 6: 32, 7: 21, 8: 49, 9: 70, 10: 43, 11: 9, 12: 1}


In [7]:
df_dates_scrambled = pd.read_csv('./data/scrambled_dates.csv')
dates_scrambled = get_date_list(df_dates_scrambled)

# print first and last scrambled dates
print(dates_scrambled[0])
print(dates_scrambled[-1], "\n")

# sort the list of dates
dates_ordered = sorted(dates_scrambled)

# print first and last ordered dates
print(dates_ordered[0])
print(dates_ordered[-1])

1988-08-04
2011-07-18 

1950-08-31
2017-10-29


## Turning dates into strings

In [8]:
from datetime import date 
# Example date 
d = date(2017, 11, 5)  
# ISO 8601 format by default: YYYY-MM-DD 
print(d, "\n")

# Express the date in ISO 8601 format explicitly and put it in a list 
print([d.isoformat()]) 

2017-11-05 

['2017-11-05']


In [9]:
 # A few dates that computers once had trouble with 
some_dates = ['2000-01-01', '1999-12-31']  
# Print them in order - sorted function sorts ISO 8601 formatted dates correctly if they are expressed as strings
print(sorted(some_dates)) 

['1999-12-31', '2000-01-01']


In [10]:
# Assign the earliest date to first_date
first_date = sorted(florida_hurricane_dates)[0]

# Convert to ISO and US formats
iso = "Our earliest hurricane date: " + first_date.isoformat()
us = "Our earliest hurricane date: " + first_date.strftime("%m/%d/%Y")

print("ISO: " + iso)
print("US: " + us)

ISO: Our earliest hurricane date: 1950-08-31
US: Our earliest hurricane date: 08/31/1950


In [11]:
# Create a date object
andrew = date(1992, 8, 26)

# Print the date in the format 'YYYY-MM'
print(andrew.strftime("%Y-%m"))

1992-08


In [12]:
# Create a date object
andrew = date(1992, 8, 26)

# Print the date in the format 'MONTH (YYYY)'
print(andrew.strftime("%B (%Y)"))

August (1992)


In [13]:
# Create a date object
andrew = date(1992, 8, 26)

# Print the date in the format 'YYYY-DDD'
print(andrew.strftime("%Y-%j"))

1992-239


## Analyzing Bike Sharing Data

In [14]:
# read just the start and end date columns of capital bike csv data
df_bike = pd.read_csv('./data/capital-onebike.csv', usecols=[0, 1], parse_dates=[0, 1])
print(df_bike.columns)
print(f"dtype of Start date column: {df_bike['Start date'].dtype}, dtype of End date column: {df_bike['End date'].dtype}")
df_bike.head()

Index(['Start date', 'End date'], dtype='object')
dtype of Start date column: datetime64[ns], dtype of End date column: datetime64[ns]


Unnamed: 0,Start date,End date
0,2017-10-01 15:23:25,2017-10-01 15:26:26
1,2017-10-01 15:42:57,2017-10-01 17:49:59
2,2017-10-02 06:37:10,2017-10-02 06:42:53
3,2017-10-02 08:56:45,2017-10-02 09:18:03
4,2017-10-02 18:23:48,2017-10-02 18:45:05


In [15]:
from datetime import datetime

# convert to datetime63[s], see third example here:
# https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-and-timedelta-arithmetic
df_bike["Start date"] = df_bike["Start date"].astype('datetime64[s]')
df_bike["End date"] = df_bike["Start date"].astype('datetime64[s]')
# df_bike.head()
onebike_datetimes = []

# iterate through the rows
for row in df_bike.itertuples(index=False, name=None):
    item = {"start": row[0], "end": row[1]}
    onebike_datetimes.append(item)

onebike_datetimes[0:4]

[{'start': Timestamp('2017-10-01 15:23:25'),
  'end': Timestamp('2017-10-01 15:23:25')},
 {'start': Timestamp('2017-10-01 15:42:57'),
  'end': Timestamp('2017-10-01 15:42:57')},
 {'start': Timestamp('2017-10-02 06:37:10'),
  'end': Timestamp('2017-10-02 06:37:10')},
 {'start': Timestamp('2017-10-02 08:56:45'),
  'end': Timestamp('2017-10-02 08:56:45')}]

### How many trips before and after noon?

In [16]:
# Create dictionary to hold results
trip_counts = {'AM': 0, 'PM': 0}
  
# Loop over all trips
for trip in onebike_datetimes:
  # Check to see if the trip starts before noon
  if trip['start'].hour < 12:
    # Increment the counter for before noon
    trip_counts['AM'] += 1
  else:
    # Increment the counter for after noon
    trip_counts['PM'] += 1
  
print(trip_counts)

{'AM': 94, 'PM': 196}


## Printing and parsing datetimes

`strftime` codes can be found here:
+ https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes  or
+ https://strftime.org/

In [17]:
dt = datetime.strptime("12/30/2017 15:19:13",  
                       "%m/%d/%Y %H:%M:%S")
print(dt)
print(type(dt))

2017-12-30 15:19:13
<class 'datetime.datetime'>


In [18]:
# Starting string, in YYYY-MM-DD HH:MM:SS format
s = '2017-02-03 00:00:01'

# Write a format string to parse s
fmt = '%Y-%m-%d %H:%M:%S'

# Create a datetime object d
d = datetime.strptime(s, fmt)

# Print d
print(d)

2017-02-03 00:00:01


In [19]:
# Starting string, in YYYY-MM-DD format
s = '2030-10-15'

# Write a format string to parse s
fmt = '%Y-%m-%d'

# Create a datetime object d
d = datetime.strptime(s, fmt)

# Print d
print(d)

2030-10-15 00:00:00


In [20]:
# Starting string, in MM/DD/YYYY HH:MM:SS format
s = '12/15/1986 08:00:00'

# Write a format string to parse s
fmt = '%m/%d/%Y %H:%M:%S'

# Create a datetime object d
d = datetime.strptime(s, fmt)

# Print d
print(d)

1986-12-15 08:00:00


In [27]:
# read in the exercise date string
df_datetime_strings = pd.read_csv('./data/onebike_datetime_strings.csv', usecols=[0, 1], dtype={"start": str, "end": str})
onebike_datetime_strings = []
for row in df_datetime_strings.itertuples(index=False, name=None):
    onebike_datetime_strings.append((row[0], row[1]))

onebike_datetime_strings[:3]

[('2017-10-01 15:23:25', '2017-10-01 15:26:26'),
 ('2017-10-01 15:42:57', '2017-10-01 17:49:59'),
 ('2017-10-02 06:37:10', '2017-10-02 06:42:53')]

In [28]:
# write out the format string
fmt = "%Y-%m-%d %H:%M:%S"

# Initialize a list for holding the pairs of datetime objects
onebike_datetimes = []

# Loop over all trips
for (start, end) in onebike_datetime_strings:
  trip = {'start': datetime.strptime(start, fmt),
          'end': datetime.strptime(end, fmt)}
  
  # Append the trip
  onebike_datetimes.append(trip)

In [29]:
# Pull out the start of the first trip
first_start = onebike_datetimes[0]['start']

# Format to feed to strftime()
fmt = "%Y-%m-%dT%H:%M:%S"

# Print out date with .isoformat(), then with .strftime() to compare
print(first_start.isoformat())
print(first_start.strftime(fmt))

2017-10-01T15:23:25
2017-10-01T15:23:25


In [30]:
# Starting timestamps
timestamps = [1514665153, 1514664543]

# Datetime objects
dts = []

# Loop
for ts in timestamps:
  dts.append(datetime.fromtimestamp(ts))
  
# Print results
print(dts)

[datetime.datetime(2017, 12, 30, 13, 19, 13), datetime.datetime(2017, 12, 30, 13, 9, 3)]


## Working with durations