## Creating date objects

In [None]:
from datetime import date
d1, d2, d3 = date(2016, 10, 7), date(2017, 6, 21), date(2016, 6, 21)
hurricanes_dates = [d1, d2, d3]
print(hurricanes_dates)
print("access year and weekday: ", hurricanes_dates[0].year, hurricanes_dates[0].weekday())
print('min date: ', min(hurricanes_dates)) # returned date is in YYYY-MM-DD format
print("sorted dates: ", sorted(hurricanes_dates))

`Weekdays in python: 0 for Monday and 6 for Sunday`

## Creating Date and time Objects

In [None]:
from datetime import datetime
dt = datetime(2017, 10, 1, 15, 23, 25, 500000) # YYYY, MM, DD, HH, mm, ss, micro-sec
# dt = datetime(year=2017, month=10, day=1, hour=15, minute=23, second=25, microsecond=500000)
print("Date: ", dt)

dt_replaced = dt.replace(second=0, microsecond=0) # Replacing parts of a datetime
print("replaced dt: ", dt_replaced)

## timedelta / duration / Time Difference

In [None]:
delta = d2 - d1
print("Difference: ", delta)
print("Days difference: ", delta.days)
print("Seconds difference: ", delta.total_seconds())

from datetime import timedelta
td = timedelta(days=10, seconds=10) # Create a 10 day and 10 seconds timedelta
print(f"Add 10 days to {dt}: ", dt + td)
print(f"Subtract 1 minute from {dt}: ", dt + timedelta(minutes=-1))
print(f"Subtract 1 minute from {dt}: ", dt - timedelta(minutes=1)) # same result as above


## Get Formatted dates and Parsing Dates

In [None]:
print("ISO Format of d1: ", d1.isoformat())
print("String format of d1: ", d1.strftime("Year is %Y and day is %d"))

print("ISO Format of dt: ", dt.isoformat())
print("String format of dt: ", dt.strftime("%Y/%m/%d %H:%M:%S"))

# Parsing dates
from datetime import datetime
parsed_dt = datetime.strptime("12/30/2017 15:19:13", "%m/%d/%Y %H:%M:%S")
print("Type: ", type(parsed_dt))
print("Year and hour: ", parsed_dt.year, parsed_dt.hour)

ts = 1514665153.0
print("Timestamp to date: ", datetime.fromtimestamp(ts)) # convert timestamp to date


## Working with timezone offsets

In [None]:
from datetime import datetime, timedelta, timezone

ET = timezone(timedelta(hours=-5)) # US Eastern Standard time zone
dt_ET = datetime(2017, 12, 30, 15, 9, 3, tzinfo = ET) # datetime with timezone offset
print("Date time as per Easter standard timezone: ", dt_ET)

# Converting timezone - it will change time also
IST = timezone(timedelta(hours=5, minutes=30)) # India Standard time zone
print("Date time shifted to indian timezone: ", dt_ET.astimezone(IST)) # Convert ET to IST - it will add 10h and 30m.

# Replacing timezone - it will not change time
print(f"Timezone replaced to utc timezone for {dt_ET}: ", dt_ET.replace(tzinfo=timezone.utc))
print(f"Timezone replaced to indian timezone for {dt_ET}: ", dt_ET.replace(tzinfo=IST))

print('-' * 100)

# Getting timezone offsets from timezone database
from dateutil import tz
et = tz.gettz('America/New_York') # Eastern time
dt_et = datetime(2017, 12, 30, 15, 9, 3, tzinfo = et) # timezone offset from dataset
print("datetime with Timezone offset: ", dt_et)

## Starting Daylight saving time

In [None]:
from datetime import datetime, timedelta, timezone

spring_ahead_159am = datetime(2017, 3, 12, 1, 59, 59)
print("Time 1: ", spring_ahead_159am.isoformat())

spring_ahead_3am = datetime(2017, 3, 12, 3, 0, 0)
print("Time 2: ",spring_ahead_3am.isoformat())

print("Time difference in seconds: ", (spring_ahead_3am - spring_ahead_159am).total_seconds())

print('-' * 100)

EST = timezone(timedelta(hours=-5)) # EST = tz.gettz('US/Eastern')
EDT = timezone(timedelta(hours=-4)) # EDT = tz.gettz('US/Eastern')

spring_ahead_159am = spring_ahead_159am.replace(tzinfo = EST)
print("Time, before start of DLS: ", spring_ahead_159am.isoformat())
spring_ahead_3am = spring_ahead_3am.replace(tzinfo = EDT)
print("Time, upon start of DLS: ",spring_ahead_3am.isoformat())

# Check time difference without utc timezone
print("Time difference with DLS: ", (spring_ahead_3am - spring_ahead_159am).total_seconds())

# Check time difference with utc timezone
spring_ahead_159am = spring_ahead_159am.astimezone(tz.UTC) # switch timezone to utc
spring_ahead_3am = spring_ahead_3am.astimezone(tz.UTC) # switch timezone to utc
print(f"Time difference with UTC offset of {spring_ahead_3am} - {spring_ahead_159am}: --> ", (spring_ahead_3am - spring_ahead_159am).total_seconds())


## Ending Daylight saving time

In [None]:
eastern = tz.gettz('US/Eastern')

first_1am = datetime(2017, 11, 5, 1, 0, 0, tzinfo = eastern)
print("1AM amboguous: ", tz.datetime_ambiguous(first_1am)) # Check if 1am is clocking twice
print("first_1am: ", first_1am) # don't know; if this time is with -4 or -5 offset. Means, if it comes under DLS or standard timezone

second_1am = tz.enfold(first_1am) # Switch '1am' from DLS to standard timezone. If we are sure, this '1am' is of Standard timezone and not DLS.
print("second 1am: ", second_1am)

# Check time difference without utc timezone
print("Time difference with DLS: ", (second_1am - first_1am).total_seconds()) # wrong time difference

# Check time difference without utc timezone
first_1am = first_1am.astimezone(tz.UTC) # switch timezone to utc
second_1am = second_1am.astimezone(tz.UTC) # switch timezone to utc
print(f"Difference with utc offset of {second_1am} - {first_1am}:--> ", (second_1am - first_1am).total_seconds())

---

## DateTime with pandas

In [1]:
import pandas as pd
rides = pd.read_csv('./datasets/capital-onebike.csv', parse_dates = ['Start date', 'End date']) # Import W20529's rides in Q4 2017
# rides['Start date'] = pd.to_datetime(rides['Start date'], format = "%Y-%m-%d %H:%M:%S") # use parse_dates or this commented line for 'Start date'

print(rides.head(3))
# print(rides['Bike number'].unique()) # dataset is of only 1 bike

           Start date            End date  Start station number  \
0 2017-10-01 15:23:25 2017-10-01 15:26:26                 31038   
1 2017-10-01 15:42:57 2017-10-01 17:49:59                 31036   
2 2017-10-02 06:37:10 2017-10-02 06:42:53                 31036   

                   Start station  End station number  \
0           Glebe Rd & 11th St N               31036   
1  George Mason Dr & Wilson Blvd               31036   
2  George Mason Dr & Wilson Blvd               31037   

                            End station Bike number Member type  
0         George Mason Dr & Wilson Blvd      W20529      Member  
1         George Mason Dr & Wilson Blvd      W20529      Casual  
2  Ballston Metro / N Stuart & 9th St N      W20529      Member  


### Get duration

In [None]:
rides['Duration'] = rides['End date'] - rides['Start date'] # Create a duration column
rides['Duration seconds'] = rides['Duration'].dt.total_seconds()
print(rides['Duration seconds'].head(5), end='\n\n')

print("Mean: ", rides['Duration'].mean()) # Average time out of the dock
print("Sum: ", rides['Duration'].sum()) # Total time out of the dock

print("Percent of time, out of the dock: ", rides['Duration'].sum() / timedelta(days=91), end='\n\n') # Percent of time out of the dock - used 91, because we have data of 91 days only

print("Average duration per member type: ", rides.groupby('Member type')['Duration seconds'].mean(), sep='\n')
print("Average duration by month: ", rides.resample('ME', on = 'Start date')['Duration seconds'].mean(), sep='\n', end='\n\n')

print("Minimum duration: ", rides['Duration seconds'].min()) # Duration is negative. Upon checking we found out that its because of DLS ending at that day

## Print ambiguous time

Here it looks `Start data` is greater than `End date`. 

But in realty `End data` is after `Start date` because DLS is ending at this date. and `Start date` falls under DLS timezone and `End date` falls under Standard timezone

In [None]:
print("Ambiguous value location: ", rides[['Start date', 'End date', 'Member type']].iloc[129], sep='\n')

### Set timezone to dates

`Without setting timezone to 'Start data' and 'End data', we cannot calculate exact duration for DLS switching days `

In [None]:
# rides['Start date'] = rides['Start date'].dt.tz_localize('America/New_York') # it gives error because there is one value for which its ambiguous that either its offset is DLS or standard.

# seeting ambiguous values with 'Not a Time'
rides['Start date'] = rides['Start date'].dt.tz_localize('America/New_York', ambiguous='NaT') # based on our previous knowledge of ambiguous time, We can also set 'ambiguous' values
rides['End date'] = rides['End date'].dt.tz_localize('America/New_York', ambiguous='NaT')

## Check duration again after setting timezone

In [None]:
# Find duration and min value again
rides['Duration'] = rides['End date'] - rides['Start date'] # Create a duration column
rides['Duration seconds'] = rides['Duration'].dt.total_seconds()
print("Minimum duration: ", rides['Duration seconds'].min())

print("Ambiguous value location: ", rides.iloc[129], sep='\n')

## Other datatime operations

In [None]:
print("year: ", rides['Start date'].head(3).dt.year, sep='\n') # Year of first three rows
print("Dayname: ", rides['Start date'].head(3).dt.day_name(), sep='\n') # See weekdays for the first three rows

print(rides['End date'].shift(1).head(3)) # Shift the indexes forward one, padding with NaT

# Find duration between two rides. i.e; when one ride ended, after how much time second ride started
# Find difference of time between Start date and previous ride End date - For this, data should be sorted based on Start date
rides['End date shifted'] = rides['End date'].shift(1)
rides['time_to_last_ride'] = (rides['Start date'] - rides['End date shifted']).dt.total_seconds()
rides[['Start date', 'End date', 'End date shifted', 'time_to_last_ride']]