## Oil prices

In [25]:
import pandas as pd
import numpy as np


In [26]:
df = pd.read_csv('../data/wti-daily.csv',
                 parse_dates=['Date'],
                 index_col=['Date'])
df.head()

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
1986-01-02,25.56
1986-01-03,26.0
1986-01-06,26.53
1986-01-07,25.85
1986-01-08,25.87


In [27]:
df.loc['1992-06-15']

Price    22.38
Name: 1992-06-15 00:00:00, dtype: float64

In [28]:
df.loc['1992-06']

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
1992-06-01,22.07
1992-06-02,22.2
1992-06-03,22.45
1992-06-04,22.53
1992-06-05,22.65
1992-06-08,22.43
1992-06-09,22.23
1992-06-10,22.49
1992-06-11,22.33
1992-06-12,22.28


In [29]:
df.loc['1992-06'].mean()

Price    22.384545
dtype: float64

In [30]:
df.loc['2003-09':'2014-07'].mean()

Price    76.457834
dtype: float64

Pandas makes this easy to do with the is_quarter_end attribute on the dt accessor for datetime series. In our case, the datetime values aren’t exactly in a series; they’re on our index

In [31]:
df.loc[df.index.is_quarter_end]

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
1986-03-31,10.25
1986-06-30,12.80
1986-09-30,14.70
1986-12-31,17.93
1987-03-31,18.82
...,...
2020-09-30,40.05
2020-12-31,48.35
2021-03-31,59.19
2021-06-30,73.52


In [32]:
df.resample('1YE').mean()

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
1986-12-31,15.047689
1987-12-31,19.200512
1988-12-31,15.965409
1989-12-31,19.635486
1990-12-31,24.526576
1991-12-31,21.541367
1992-12-31,20.575564
1993-12-31,18.4322
1994-12-31,17.196429
1995-12-31,18.428805


In [33]:
df['Price'].sort_values().iloc[[0, -1]]

Date
2020-04-20    -36.98
2008-07-03    145.31
Name: Price, dtype: float64

In [34]:
df['Price'].agg(['idxmin', 'idxmax'])

idxmin   2020-04-20
idxmax   2008-07-03
Name: Price, dtype: datetime64[ns]

In [35]:
df.resample('1QE').agg(['mean', 'std'])

Unnamed: 0_level_0,Price,Price
Unnamed: 0_level_1,mean,std
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
1986-03-31,17.217213,4.856866
1986-06-30,13.866094,1.346364
1986-09-30,13.813906,1.809548
1986-12-31,15.406452,0.824353
1987-03-31,18.250328,0.661624
...,...,...
2020-12-31,42.524921,3.844217
2021-03-31,58.093443,4.931396
2021-06-30,66.186667,4.403607
2021-09-30,70.575469,3.078296


In [36]:
df.resample('1QE').mean().diff().sort_values('Price', ascending=False).iloc[0]

Price    25.91959
Name: 2008-06-30 00:00:00, dtype: float64

In [37]:
df.resample('1QE').mean().pct_change().sort_values('Price', ascending=False).iloc[0]


Price    0.475456
Name: 1990-09-30 00:00:00, dtype: float64