In [2]:
import pandas as pd

### Import the wti-daily.csv file into a data frame in which the Date column is treated as a datetime value and is set to be the index.

In [3]:
df = pd.read_csv(filepath_or_buffer='wti-daily.csv', parse_dates=[0])

In [4]:
df.dtypes

Date     datetime64[ns]
Price           float64
dtype: object

In [5]:
df['Date'].head()

0   1986-01-02
1   1986-01-03
2   1986-01-06
3   1986-01-07
4   1986-01-08
Name: Date, dtype: datetime64[ns]

In [6]:
df = df.set_index('Date')

In [7]:
df.index

DatetimeIndex(['1986-01-02', '1986-01-03', '1986-01-06', '1986-01-07',
               '1986-01-08', '1986-01-09', '1986-01-10', '1986-01-13',
               '1986-01-14', '1986-01-15',
               ...
               '2025-12-15', '2025-12-16', '2025-12-17', '2025-12-18',
               '2025-12-19', '2025-12-22', '2025-12-23', '2025-12-24',
               '2025-12-26', '2025-12-29'],
              dtype='datetime64[ns]', name='Date', length=10067, freq=None)

### Answer these questions:
* What was the average price of a barrel of oil in June 1992?

In [8]:
df.loc['1992-06'].shape[0]

22

In [9]:
df.loc['1992-06-01':'1992-06-30'].shape[0]

22

In [10]:
df.loc['1992-06'].mean()

Price    22.384545
dtype: float64

* What was the average price of a barrel of oil in all of 1987?

In [11]:
df.loc['1987'].mean()

Price    19.200512
dtype: float64

* What was the average price from September 2003 through July 2014?

In [16]:
df.loc['2003-09':'2014-07-31']

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2003-09-02,29.57
2003-09-03,29.43
2003-09-04,28.87
2003-09-05,28.93
2003-09-08,28.85
...,...
2014-07-25,105.23
2014-07-28,105.68
2014-07-29,104.91
2014-07-30,104.29


In [17]:
df.loc['2003-09':'2014-07-31'].mean()

Price    76.457834
dtype: float64

In [18]:
df.loc['2003-09':'2014-07'].mean()

Price    76.457834
dtype: float64

### Show the price of oil ``at the end of each quarter`` in the data set.

In [26]:
df.index.is_quarter_end

array([False, False, False, ..., False, False, False], shape=(10067,))

In [None]:
df.loc[df.index.is_quarter_end]

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
1986-03-31,10.25
1986-06-30,12.80
1986-09-30,14.70
1986-12-31,17.93
1987-03-31,18.82
...,...
2024-09-30,68.75
2024-12-31,72.44
2025-03-31,71.87
2025-06-30,66.30


### For each year in the data set, show the average price.

In [37]:
df.resample('YE').mean()

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
1986-12-31,15.047689
1987-12-31,19.200512
1988-12-31,15.965409
1989-12-31,19.635486
1990-12-31,24.526576
1991-12-31,21.541367
1992-12-31,20.575564
1993-12-31,18.4322
1994-12-31,17.196429
1995-12-31,18.428805


### On which date were oil prices the highest? When were they the lowest?

In [31]:
df['Price'].sort_values().head(1) # Lowest prices

Date
2020-04-20   -36.98
Name: Price, dtype: float64

In [30]:
df['Price'].sort_values(ascending=False).head(1) # Highest prices

Date
2008-07-03    145.31
Name: Price, dtype: float64

In [29]:
df['Price'].sort_values().iloc[[0,-1]]

Date
2020-04-20    -36.98
2008-07-03    145.31
Name: Price, dtype: float64

## Beyond the exercise


### Use resample to find, for each quarter, the mean and standard deviations in price.

In [37]:
df.resample('QE').agg(['mean', 'std'])

Unnamed: 0_level_0,Price,Price
Unnamed: 0_level_1,mean,std
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
1986-03-31,17.217213,4.856866
1986-06-30,13.866094,1.346364
1986-09-30,13.813906,1.809548
1986-12-31,15.406452,0.824353
1987-03-31,18.250328,0.661624
...,...,...
2024-12-31,70.729032,2.162225
2025-03-31,71.783333,3.608221
2025-06-30,64.567258,4.096029
2025-09-30,65.779531,2.292189


### In which quarter did you see the biggest increase in mean price from the previous quarter?

In [None]:
df.resample('QE').mean().diff()

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
1986-03-31,
1986-06-30,-3.351119
1986-09-30,-0.052188
1986-12-31,1.592545
1987-03-31,2.843876
...,...
2024-12-31,-5.698468
2025-03-31,1.054301
2025-06-30,-7.216075
2025-09-30,1.212273


In [39]:
df.resample('QE').mean().diff().max()

Price    25.91959
dtype: float64

In [53]:
df.resample('QE').mean().diff().sort_values(ascending=False, by='Price').iloc[0] # The second quarter of 2008

Price    25.91959
Name: 2008-06-30 00:00:00, dtype: float64

## What was the biggest percentage increase in oil prices across quarters?

In [62]:
df.resample('QE').mean().pct_change().sort_values(by='Price', ascending=False).iloc[0]

Price    0.475456
Name: 1990-09-30 00:00:00, dtype: float64