# Resampling Frequency: Upsampling

## Data and Imports

In [5]:
import pandas as pd

In [7]:
sp500 = pd.read_pickle('sp500.pkl')

In [9]:
sp500.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Year,Month,Day,Day_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-12-27,2266.23,2273.82,2266.15,2268.88,1987080000,2016,12,27,Tuesday
2016-12-28,2270.23,2271.31,2249.11,2249.92,2392360000,2016,12,28,Wednesday
2016-12-29,2249.5,2254.51,2244.56,2249.26,2336370000,2016,12,29,Thursday
2016-12-30,2251.61,2253.58,2233.62,2238.83,2670900000,2016,12,30,Friday
2017-01-03,2251.57,2263.88,2245.13,2257.83,3770530000,2017,1,3,Tuesday


## Upsampling

### asfreq() method

In [23]:
## Converting freqency of data set to daily (to include weekends)
sp500_1d_asfreq = sp500.asfreq('1D')

In [25]:
## Notice the new NaN data for 2021-12-18
## We didn't define how asfreq should fill in the data

sp500_1d_asfreq

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Year,Month,Day,Day_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-12-27,2266.23,2273.82,2266.15,2268.88,1.987080e+09,2016.0,12.0,27.0,Tuesday
2016-12-28,2270.23,2271.31,2249.11,2249.92,2.392360e+09,2016.0,12.0,28.0,Wednesday
2016-12-29,2249.50,2254.51,2244.56,2249.26,2.336370e+09,2016.0,12.0,29.0,Thursday
2016-12-30,2251.61,2253.58,2233.62,2238.83,2.670900e+09,2016.0,12.0,30.0,Friday
2016-12-31,,,,,,,,,
...,...,...,...,...,...,...,...,...,...
2021-12-18,,,,,,,,,
2021-12-19,,,,,,,,,
2021-12-20,4587.90,4587.90,4531.10,4568.02,3.395780e+09,2021.0,12.0,20.0,Monday
2021-12-21,4594.96,4651.14,4583.16,4649.23,2.564370e+09,2021.0,12.0,21.0,Tuesday


In [27]:
## Lets use the method parameter to fill in the NaN data

## recall we can use ffill or bfill
## ffill uses last valid record and bdill uses next valid record to fill

sp500.asfreq('1D', method = 'ffill').head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Year,Month,Day,Day_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-12-27,2266.23,2273.82,2266.15,2268.88,1987080000,2016,12,27,Tuesday
2016-12-28,2270.23,2271.31,2249.11,2249.92,2392360000,2016,12,28,Wednesday
2016-12-29,2249.5,2254.51,2244.56,2249.26,2336370000,2016,12,29,Thursday
2016-12-30,2251.61,2253.58,2233.62,2238.83,2670900000,2016,12,30,Friday
2016-12-31,2251.61,2253.58,2233.62,2238.83,2670900000,2016,12,30,Friday
2017-01-01,2251.61,2253.58,2233.62,2238.83,2670900000,2016,12,30,Friday
2017-01-02,2251.61,2253.58,2233.62,2238.83,2670900000,2016,12,30,Friday
2017-01-03,2251.57,2263.88,2245.13,2257.83,3770530000,2017,1,3,Tuesday
2017-01-04,2261.6,2272.82,2261.6,2270.75,3764890000,2017,1,4,Wednesday
2017-01-05,2268.18,2271.5,2260.45,2269.0,3761820000,2017,1,5,Thursday


In [31]:
## We can also use the .fillna() method and use the method parameter in there
## Gives us the option with the same goal

sp500.asfreq('1D').fillna(method='ffill').head(10)

  sp500.asfreq('1D').fillna(method='ffill').head(10)


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Year,Month,Day,Day_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-12-27,2266.23,2273.82,2266.15,2268.88,1987080000.0,2016.0,12.0,27.0,Tuesday
2016-12-28,2270.23,2271.31,2249.11,2249.92,2392360000.0,2016.0,12.0,28.0,Wednesday
2016-12-29,2249.5,2254.51,2244.56,2249.26,2336370000.0,2016.0,12.0,29.0,Thursday
2016-12-30,2251.61,2253.58,2233.62,2238.83,2670900000.0,2016.0,12.0,30.0,Friday
2016-12-31,2251.61,2253.58,2233.62,2238.83,2670900000.0,2016.0,12.0,30.0,Friday
2017-01-01,2251.61,2253.58,2233.62,2238.83,2670900000.0,2016.0,12.0,30.0,Friday
2017-01-02,2251.61,2253.58,2233.62,2238.83,2670900000.0,2016.0,12.0,30.0,Friday
2017-01-03,2251.57,2263.88,2245.13,2257.83,3770530000.0,2017.0,1.0,3.0,Tuesday
2017-01-04,2261.6,2272.82,2261.6,2270.75,3764890000.0,2017.0,1.0,4.0,Wednesday
2017-01-05,2268.18,2271.5,2260.45,2269.0,3761820000.0,2017.0,1.0,5.0,Thursday


### resample method

In [35]:
## We can use the resample() method()

## We can aggregate by using asfreq(). 
## This asfreq here is a method for the resampler objects, it just returns the valuesat the new frequency. 
## We can see that we've gotthe data set that are daily, but with the newly added dates missing. 

## Notice how we have all the NaN values now 
## Lets fill them

sp500.resample('1D').asfreq()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Year,Month,Day,Day_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-12-27,2266.23,2273.82,2266.15,2268.88,1.987080e+09,2016.0,12.0,27.0,Tuesday
2016-12-28,2270.23,2271.31,2249.11,2249.92,2.392360e+09,2016.0,12.0,28.0,Wednesday
2016-12-29,2249.50,2254.51,2244.56,2249.26,2.336370e+09,2016.0,12.0,29.0,Thursday
2016-12-30,2251.61,2253.58,2233.62,2238.83,2.670900e+09,2016.0,12.0,30.0,Friday
2016-12-31,,,,,,,,,
...,...,...,...,...,...,...,...,...,...
2021-12-18,,,,,,,,,
2021-12-19,,,,,,,,,
2021-12-20,4587.90,4587.90,4531.10,4568.02,3.395780e+09,2021.0,12.0,20.0,Monday
2021-12-21,4594.96,4651.14,4583.16,4649.23,2.564370e+09,2021.0,12.0,21.0,Tuesday


In [37]:
## To fill them, we can replace this asfreq method with methods such as ffill.
## The method ffill works as we've learned to fill forward the missing values.

sp500.resample('1D').ffill()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Year,Month,Day,Day_name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-12-27,2266.23,2273.82,2266.15,2268.88,1987080000,2016,12,27,Tuesday
2016-12-28,2270.23,2271.31,2249.11,2249.92,2392360000,2016,12,28,Wednesday
2016-12-29,2249.50,2254.51,2244.56,2249.26,2336370000,2016,12,29,Thursday
2016-12-30,2251.61,2253.58,2233.62,2238.83,2670900000,2016,12,30,Friday
2016-12-31,2251.61,2253.58,2233.62,2238.83,2670900000,2016,12,30,Friday
...,...,...,...,...,...,...,...,...,...
2021-12-18,4652.50,4666.70,4600.22,4620.64,5609780000,2021,12,17,Friday
2021-12-19,4652.50,4666.70,4600.22,4620.64,5609780000,2021,12,17,Friday
2021-12-20,4587.90,4587.90,4531.10,4568.02,3395780000,2021,12,20,Monday
2021-12-21,4594.96,4651.14,4583.16,4649.23,2564370000,2021,12,21,Tuesday
