In [1]:
import warnings
warnings.filterwarnings("ignore")


import requests

import pandas as pd
import numpy as np
from scipy import stats
from math import sqrt
import matplotlib.pyplot as plt
import seaborn as sns

import statsmodels.api as sm
from statsmodels.tsa.api import Holt

from datetime import datetime

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit 

from wrangle import get_crypto_price



In [2]:
btc = get_crypto_price('btc', '2018-01-01', '2022-12-12')

In [3]:
resampled_2021 = btc['2021'].resample('D').mean()

In [4]:
resampled_2021

Unnamed: 0_level_0,btc_open,btc_high,btc_low,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-01,28923.63,29600.00,28624.57,29263.09,46884.263225
2021-01-02,29331.70,33300.00,28946.53,32100.04,124427.120161
2021-01-03,32176.45,34778.11,31962.99,33000.05,115941.583083
2021-01-04,33000.05,33600.00,28130.00,31988.71,121959.073778
2021-01-05,31989.75,34360.00,29900.00,33949.53,114934.175737
...,...,...,...,...,...
2021-12-27,50775.48,52088.00,50449.00,50701.44,28491.333770
2021-12-28,50701.44,50704.05,47313.01,47543.74,45174.083720
2021-12-29,47543.69,48139.08,46096.99,46464.66,38942.397470
2021-12-30,46464.66,47900.00,45900.00,47120.88,29959.465160


In [5]:
resampled_2021.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2021-01-01 to 2021-12-31
Freq: D
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   btc_open    326 non-null    float64
 1   btc_high    326 non-null    float64
 2   btc_low     326 non-null    float64
 3   btc_close   326 non-null    float64
 4   btc_volume  326 non-null    float64
dtypes: float64(5)
memory usage: 17.1 KB


In [8]:
resampled_2021 = resampled_2021.groupby(resampled_2021.index.day).ffill()

In [10]:
resampled_2021.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2021-01-01 to 2021-12-31
Freq: D
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   btc_open    360 non-null    float64
 1   btc_high    360 non-null    float64
 2   btc_low     360 non-null    float64
 3   btc_close   360 non-null    float64
 4   btc_volume  360 non-null    float64
dtypes: float64(5)
memory usage: 17.1 KB


In [11]:
resampled_2021 = resampled_2021.groupby(resampled_2021.index.day).bfill()

In [13]:
resampled_2021.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2021-01-01 to 2021-12-31
Freq: D
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   btc_open    365 non-null    float64
 1   btc_high    365 non-null    float64
 2   btc_low     365 non-null    float64
 3   btc_close   365 non-null    float64
 4   btc_volume  365 non-null    float64
dtypes: float64(5)
memory usage: 17.1 KB


In [23]:
train = btc[:'2020']
validate = btc['2021'].resample('D').mean()
test = btc['2022'].resample('D').mean()

In [24]:
validate = validate.groupby(validate.index.day).bfill()

In [25]:
validate.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2021-01-01 to 2021-12-31
Freq: D
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   btc_open    365 non-null    float64
 1   btc_high    365 non-null    float64
 2   btc_low     365 non-null    float64
 3   btc_close   365 non-null    float64
 4   btc_volume  365 non-null    float64
dtypes: float64(5)
memory usage: 17.1 KB


In [26]:
train = train[train.index != '2020-02-29']

In [28]:
train['2020'].info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2020-01-01 to 2020-12-31
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   btc_open    365 non-null    float64
 1   btc_high    365 non-null    float64
 2   btc_low     365 non-null    float64
 3   btc_close   365 non-null    float64
 4   btc_volume  365 non-null    float64
dtypes: float64(5)
memory usage: 17.1 KB


In [29]:
test

Unnamed: 0_level_0,btc_open,btc_high,btc_low,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,46216.93,47954.63,46208.37,47722.65,19311.57065
2022-01-02,47722.65,47990.00,46654.00,47286.18,18094.83401
2022-01-03,47283.71,47570.00,45696.00,46446.09,27241.59512
2022-01-04,46446.09,47557.54,45500.00,45832.02,33716.54110
2022-01-05,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...
2022-12-08,16837.61,17298.58,16733.49,17224.82,122071.70341
2022-12-09,17223.70,17353.17,17058.21,17128.55,128639.48470
2022-12-10,17128.56,17225.12,17092.00,17127.49,75995.09380
2022-12-11,17127.09,17270.99,17071.17,17084.59,84358.06696


In [31]:
btc['2022'].asfreq('D')

Unnamed: 0_level_0,btc_open,btc_high,btc_low,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,46216.93,47954.63,46208.37,47722.65,19311.57065
2022-01-02,47722.65,47990.00,46654.00,47286.18,18094.83401
2022-01-03,47283.71,47570.00,45696.00,46446.09,27241.59512
2022-01-04,46446.09,47557.54,45500.00,45832.02,33716.54110
2022-01-05,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...
2022-12-08,16837.61,17298.58,16733.49,17224.82,122071.70341
2022-12-09,17223.70,17353.17,17058.21,17128.55,128639.48470
2022-12-10,17128.56,17225.12,17092.00,17127.49,75995.09380
2022-12-11,17127.09,17270.99,17071.17,17084.59,84358.06696


In [32]:
dates_2022 = ['2022-12-13', '2022-12-14','2022-12-15','2022-12-16',
              '2022-12-17','2022-12-18','2022-12-19','2022-12-20',
              '2022-12-21','2022-12-22','2022-12-23','2022-12-24',
              '2022-12-25','2022-12-26','2022-12-27','2022-12-28',
              '2022-12-29','2022-12-30','2022-12-31']

In [36]:
btc['2022'].append(pd.DataFrame(index=dates_2022))

Unnamed: 0,btc_open,btc_high,btc_low,btc_close,btc_volume
2022-01-01 00:00:00,46216.93,47954.63,46208.37,47722.65,19311.57065
2022-01-02 00:00:00,47722.65,47990.00,46654.00,47286.18,18094.83401
2022-01-03 00:00:00,47283.71,47570.00,45696.00,46446.09,27241.59512
2022-01-04 00:00:00,46446.09,47557.54,45500.00,45832.02,33716.54110
2022-01-05 00:00:00,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...
2022-12-27,,,,,
2022-12-28,,,,,
2022-12-29,,,,,
2022-12-30,,,,,


In [35]:
btc['2022']

Unnamed: 0_level_0,btc_open,btc_high,btc_low,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,46216.93,47954.63,46208.37,47722.65,19311.57065
2022-01-02,47722.65,47990.00,46654.00,47286.18,18094.83401
2022-01-03,47283.71,47570.00,45696.00,46446.09,27241.59512
2022-01-04,46446.09,47557.54,45500.00,45832.02,33716.54110
2022-01-05,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...
2022-12-08,16837.61,17298.58,16733.49,17224.82,122071.70341
2022-12-09,17223.70,17353.17,17058.21,17128.55,128639.48470
2022-12-10,17128.56,17225.12,17092.00,17127.49,75995.09380
2022-12-11,17127.09,17270.99,17071.17,17084.59,84358.06696


In [49]:
from datetime import timedelta


last_date = btc['2022'].iloc[[-1]].index
last_date = last_date + timedelta(days=1)

#for i in range(1,18):
#    last_date = last_date + timedelta(days=i)
#    stuff2022 = btc['2022'].append(pd.DataFrame(index=last_date))
stuff2022 = btc['2022'].append(pd.DataFrame(index=last_date))

In [50]:
stuff2022

Unnamed: 0_level_0,btc_open,btc_high,btc_low,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,46216.93,47954.63,46208.37,47722.65,19311.57065
2022-01-02,47722.65,47990.00,46654.00,47286.18,18094.83401
2022-01-03,47283.71,47570.00,45696.00,46446.09,27241.59512
2022-01-04,46446.09,47557.54,45500.00,45832.02,33716.54110
2022-01-05,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...
2022-12-09,17223.70,17353.17,17058.21,17128.55,128639.48470
2022-12-10,17128.56,17225.12,17092.00,17127.49,75995.09380
2022-12-11,17127.09,17270.99,17071.17,17084.59,84358.06696
2022-12-12,17084.82,17241.40,16871.85,17210.67,123528.22129
