In [25]:
import pandas as pd
import numpy as np
from datetime import datetime

In [26]:
mile_races = pd.Series(
    np.array(['4:54', '4:47', '4:52', '4:48']),
    [datetime(2019, 12, 20), datetime(2020, 2, 13), datetime(2020, 2, 27), datetime(2020, 3, 5)]
)
mile_races

2019-12-20    4:54
2020-02-13    4:47
2020-02-27    4:52
2020-03-05    4:48
dtype: object

In [27]:
mile_races.index.dtype

dtype('<M8[ns]')

In [28]:
mile_races.index[0]

Timestamp('2019-12-20 00:00:00')

In [29]:
mile_races['2/27/2020']

'4:52'

In [30]:
mile_races['20200227']

'4:52'

In [31]:
mile_races['2020']

2020-02-13    4:47
2020-02-27    4:52
2020-03-05    4:48
dtype: object

In [32]:
mile_races['2020-02']

2020-02-13    4:47
2020-02-27    4:52
dtype: object

In [33]:
mile_races['12/1/2019':'2/29/2020']

2019-12-20    4:54
2020-02-13    4:47
2020-02-27    4:52
dtype: object

In [34]:
# Current ski trip to Killington with Joe & Ben.
pd.date_range('2020-03-06', '2020-03-08')

DatetimeIndex(['2020-03-06', '2020-03-07', '2020-03-08'], dtype='datetime64[ns]', freq='D')

In [35]:
pd.date_range(start='2020-03-06', periods=3)

DatetimeIndex(['2020-03-06', '2020-03-07', '2020-03-08'], dtype='datetime64[ns]', freq='D')

In [36]:
pd.date_range(end='2020-03-08', periods=3)

DatetimeIndex(['2020-03-06', '2020-03-07', '2020-03-08'], dtype='datetime64[ns]', freq='D')

In [37]:
# Sundays in March.
pd.date_range('2020-03-01', '2020-03-31', freq='W-SUN')

DatetimeIndex(['2020-03-01', '2020-03-08', '2020-03-15', '2020-03-22',
               '2020-03-29'],
              dtype='datetime64[ns]', freq='W-SUN')

In [62]:
from pandas.tseries.offsets import Hour, Minute, MonthEnd

In [63]:
Hour()

<Hour>

In [64]:
twelve_hours = Hour(12)
twelve_hours

<12 * Hours>

In [65]:
thirty_one_minutes = Minute(31)
thirty_one_minutes

<31 * Minutes>

In [66]:
twelve_hours + thirty_one_minutes

<751 * Minutes>

In [67]:
pd.date_range('2020-02-25', '2020-02-27', freq='12h31min')

DatetimeIndex(['2020-02-25 00:00:00', '2020-02-25 12:31:00',
               '2020-02-26 01:02:00', '2020-02-26 13:33:00'],
              dtype='datetime64[ns]', freq='751T')

In [68]:
pd.date_range('2020-02-25', '2020-02-27', freq=Hour(12) + Minute(31))

DatetimeIndex(['2020-02-25 00:00:00', '2020-02-25 12:31:00',
               '2020-02-26 01:02:00', '2020-02-26 13:33:00'],
              dtype='datetime64[ns]', freq='751T')

In [69]:
mile_races_seconds = pd.Series(
    np.array([294, 287, 292, 288]),
    [datetime(2019, 12, 20), datetime(2020, 2, 13), datetime(2020, 2, 27), datetime(2020, 3, 5)]
)
mile_races_seconds

2019-12-20    294
2020-02-13    287
2020-02-27    292
2020-03-05    288
dtype: int64

In [78]:
mile_races_sec_frame = mile_races_seconds.to_frame()
mile_races_sec_frame.columns = ['seconds']
mile_races_sec_frame

Unnamed: 0,seconds
2019-12-20,294
2020-02-13,287
2020-02-27,292
2020-03-05,288


In [79]:
mile_races_sec_frame['sec_diff'] = mile_races_sec_frame['seconds'] - mile_races_sec_frame['seconds'].shift(1)
mile_races_sec_frame['percent_diff'] = (mile_races_sec_frame['seconds'] / mile_races_sec_frame['seconds'].shift(1) - 1) * 100
mile_races_sec_frame

Unnamed: 0,seconds,sec_diff,percent_diff
2019-12-20,294,,
2020-02-13,287,,-2.380952
2020-02-27,292,,1.74216
2020-03-05,288,,-1.369863


In [102]:
# Calculate the average seconds for mile races in each month.
month_offset = MonthEnd()
avg_per_month = mile_races_seconds.to_frame().groupby(month_offset.rollforward).mean()
avg_per_month

Unnamed: 0,0
2019-12-31,294.0
2020-02-29,289.5
2020-03-31,288.0


In [103]:
avg_per_month.index.tz is None

True

In [104]:
avg_per_month = avg_per_month.reset_index()
avg_per_month.columns = ['month', 'average time']
avg_per_month

Unnamed: 0,month,average time
0,2019-12-31,294.0
1,2020-02-29,289.5
2,2020-03-31,288.0


In [105]:
avg_per_month = avg_per_month.set_index(['month'])
avg_per_month

Unnamed: 0_level_0,average time
month,Unnamed: 1_level_1
2019-12-31,294.0
2020-02-29,289.5
2020-03-31,288.0


In [107]:
avg_per_month.tz_localize('America/New_York')

Unnamed: 0_level_0,average time
month,Unnamed: 1_level_1
2019-12-31 00:00:00-05:00,294.0
2020-02-29 00:00:00-05:00,289.5
2020-03-31 00:00:00-04:00,288.0


In [108]:
# Addition of time intervals respects daylight savings time.
hour_before_dst = pd.Timestamp('2020-03-08 01:59', tz='US/Eastern')
hour_before_dst

Timestamp('2020-03-08 01:59:00-0500', tz='US/Eastern')

In [109]:
hour_before_dst + Hour()

Timestamp('2020-03-08 03:59:00-0400', tz='US/Eastern')

In [115]:
period = pd.Period('2021', 'A-FEB')
period

Period('2021', 'A-FEB')

In [116]:
period + 1

Period('2022', 'A-FEB')

In [117]:
period.asfreq('D', how='start')

Period('2020-03-01', 'D')

In [118]:
period.asfreq('D', how='end')

Period('2021-02-28', 'D')

In [121]:
pd.period_range('2019', '2020', freq='Q-FEB')

PeriodIndex(['2019Q4', '2020Q1', '2020Q2', '2020Q3', '2020Q4'], dtype='period[Q-FEB]', freq='Q-FEB')

In [122]:
feb_days = pd.date_range('2020-02-01', periods=29, freq='D')
run_lengths = np.array([
    11.56, 12, 
    2.34, 3.63, 2.85, 3.06, 3.92, 7.87, 12.5, 
    2.81, 3.8, 2.65, 7.5, 2.63, 14, 13.21, 
    1.28, 1.88, 2.64, 5.20, 3.76, 7.87, 12.59, 
    2.81, 2.81, 3.45, 2.6, 2.91, 5.2
])
feb_runs = pd.Series(run_lengths, feb_days)
feb_runs

2020-02-01    11.56
2020-02-02    12.00
2020-02-03     2.34
2020-02-04     3.63
2020-02-05     2.85
2020-02-06     3.06
2020-02-07     3.92
2020-02-08     7.87
2020-02-09    12.50
2020-02-10     2.81
2020-02-11     3.80
2020-02-12     2.65
2020-02-13     7.50
2020-02-14     2.63
2020-02-15    14.00
2020-02-16    13.21
2020-02-17     1.28
2020-02-18     1.88
2020-02-19     2.64
2020-02-20     5.20
2020-02-21     3.76
2020-02-22     7.87
2020-02-23    12.59
2020-02-24     2.81
2020-02-25     2.81
2020-02-26     3.45
2020-02-27     2.60
2020-02-28     2.91
2020-02-29     5.20
Freq: D, dtype: float64

In [123]:
# Downsampling to find the average length of a run each week.
feb_runs.resample('W').mean()

2020-02-02    11.780000
2020-02-09     5.167143
2020-02-16     6.657143
2020-02-23     5.031429
2020-03-01     3.296667
Freq: W-SUN, dtype: float64

In [126]:
feb_runs.resample('W', label='left').mean()

2020-01-26    11.780000
2020-02-02     5.167143
2020-02-09     6.657143
2020-02-16     5.031429
2020-02-23     3.296667
Freq: W-SUN, dtype: float64

In [125]:
feb_runs.resample('W', label='left').sum()

2020-01-26    23.56
2020-02-02    36.17
2020-02-09    46.60
2020-02-16    35.22
2020-02-23    19.78
Freq: W-SUN, dtype: float64

In [129]:
# Upsampling mile and converted 1500m times.
quarters = [pd.Period('2013Q1'), pd.Period('2014Q1'), pd.Period('2014Q4'), pd.Period('2015Q1'), pd.Period('2016Q1'), pd.Period('2016Q2'), pd.Period('2020Q1')]
times_in_sec = [295, 280, 283, 280, 281, 267, 287]
mile_progression = pd.Series(times_in_sec, quarters)
mile_progression

2013Q1    295
2014Q1    280
2014Q4    283
2015Q1    280
2016Q1    281
2016Q2    267
2020Q1    287
Freq: Q-DEC, dtype: int64

In [130]:
mile_progression.resample('Q').asfreq()

2013Q1    295.0
2013Q2      NaN
2013Q3      NaN
2013Q4      NaN
2014Q1    280.0
2014Q2      NaN
2014Q3      NaN
2014Q4    283.0
2015Q1    280.0
2015Q2      NaN
2015Q3      NaN
2015Q4      NaN
2016Q1    281.0
2016Q2    267.0
2016Q3      NaN
2016Q4      NaN
2017Q1      NaN
2017Q2      NaN
2017Q3      NaN
2017Q4      NaN
2018Q1      NaN
2018Q2      NaN
2018Q3      NaN
2018Q4      NaN
2019Q1      NaN
2019Q2      NaN
2019Q3      NaN
2019Q4      NaN
2020Q1    287.0
Freq: Q-DEC, dtype: float64

In [131]:
mile_progression.resample('Q').ffill()

2013Q1    295
2013Q2    295
2013Q3    295
2013Q4    295
2014Q1    280
2014Q2    280
2014Q3    280
2014Q4    283
2015Q1    280
2015Q2    280
2015Q3    280
2015Q4    280
2016Q1    281
2016Q2    267
2016Q3    267
2016Q4    267
2017Q1    267
2017Q2    267
2017Q3    267
2017Q4    267
2018Q1    267
2018Q2    267
2018Q3    267
2018Q4    267
2019Q1    267
2019Q2    267
2019Q3    267
2019Q4    267
2020Q1    287
Freq: Q-DEC, dtype: int64