In [1]:
import warnings
warnings.filterwarnings("ignore")


import requests

import pandas as pd
import numpy as np
from scipy import stats
from math import sqrt
import matplotlib.pyplot as plt
import seaborn as sns

import statsmodels.api as sm
from statsmodels.tsa.api import Holt

from datetime import datetime

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit 

from wrangle import get_crypto_price



In [2]:
btc = get_crypto_price('btc', '2018-01-01', '2022-12-12')

In [3]:
resampled_2021 = btc['2021'].resample('D').mean()

In [115]:
resampled_2021.index[0]

Timestamp('2021-01-01 00:00:00', freq='D')

In [5]:
resampled_2021.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2021-01-01 to 2021-12-31
Freq: D
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   btc_open    326 non-null    float64
 1   btc_high    326 non-null    float64
 2   btc_low     326 non-null    float64
 3   btc_close   326 non-null    float64
 4   btc_volume  326 non-null    float64
dtypes: float64(5)
memory usage: 17.1 KB


In [6]:
resampled_2021 = resampled_2021.groupby(resampled_2021.index.day).ffill()

In [7]:
resampled_2021.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2021-01-01 to 2021-12-31
Freq: D
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   btc_open    360 non-null    float64
 1   btc_high    360 non-null    float64
 2   btc_low     360 non-null    float64
 3   btc_close   360 non-null    float64
 4   btc_volume  360 non-null    float64
dtypes: float64(5)
memory usage: 17.1 KB


In [8]:
resampled_2021 = resampled_2021.groupby(resampled_2021.index.day).bfill()

In [9]:
resampled_2021.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2021-01-01 to 2021-12-31
Freq: D
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   btc_open    365 non-null    float64
 1   btc_high    365 non-null    float64
 2   btc_low     365 non-null    float64
 3   btc_close   365 non-null    float64
 4   btc_volume  365 non-null    float64
dtypes: float64(5)
memory usage: 17.1 KB


In [10]:
train = btc[:'2020']
validate = btc['2021'].resample('D').mean()
test = btc['2022'].resample('D').mean()

In [11]:
validate = validate.groupby(validate.index.day).bfill()

In [12]:
validate.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2021-01-01 to 2021-12-31
Freq: D
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   btc_open    365 non-null    float64
 1   btc_high    365 non-null    float64
 2   btc_low     365 non-null    float64
 3   btc_close   365 non-null    float64
 4   btc_volume  365 non-null    float64
dtypes: float64(5)
memory usage: 17.1 KB


In [13]:
train = train[train.index != '2020-02-29']

In [14]:
train['2020'].info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2020-01-01 to 2020-12-31
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   btc_open    365 non-null    float64
 1   btc_high    365 non-null    float64
 2   btc_low     365 non-null    float64
 3   btc_close   365 non-null    float64
 4   btc_volume  365 non-null    float64
dtypes: float64(5)
memory usage: 17.1 KB


In [15]:
test

Unnamed: 0_level_0,btc_open,btc_high,btc_low,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,46216.93,47954.63,46208.37,47722.65,19311.57065
2022-01-02,47722.65,47990.00,46654.00,47286.18,18094.83401
2022-01-03,47283.71,47570.00,45696.00,46446.09,27241.59512
2022-01-04,46446.09,47557.54,45500.00,45832.02,33716.54110
2022-01-05,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...
2022-12-08,16837.61,17298.58,16733.49,17224.82,122071.70341
2022-12-09,17223.70,17353.17,17058.21,17128.55,128639.48470
2022-12-10,17128.56,17225.12,17092.00,17127.49,75995.09380
2022-12-11,17127.09,17270.99,17071.17,17084.59,84358.06696


In [16]:
btc['2022'].asfreq('D')

Unnamed: 0_level_0,btc_open,btc_high,btc_low,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,46216.93,47954.63,46208.37,47722.65,19311.57065
2022-01-02,47722.65,47990.00,46654.00,47286.18,18094.83401
2022-01-03,47283.71,47570.00,45696.00,46446.09,27241.59512
2022-01-04,46446.09,47557.54,45500.00,45832.02,33716.54110
2022-01-05,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...
2022-12-08,16837.61,17298.58,16733.49,17224.82,122071.70341
2022-12-09,17223.70,17353.17,17058.21,17128.55,128639.48470
2022-12-10,17128.56,17225.12,17092.00,17127.49,75995.09380
2022-12-11,17127.09,17270.99,17071.17,17084.59,84358.06696


In [17]:
dates_2022 = ['2022-12-13', '2022-12-14','2022-12-15','2022-12-16',
              '2022-12-17','2022-12-18','2022-12-19','2022-12-20',
              '2022-12-21','2022-12-22','2022-12-23','2022-12-24',
              '2022-12-25','2022-12-26','2022-12-27','2022-12-28',
              '2022-12-29','2022-12-30','2022-12-31']

In [18]:
btc['2022'].append(pd.DataFrame(index=dates_2022))

Unnamed: 0,btc_open,btc_high,btc_low,btc_close,btc_volume
2022-01-01 00:00:00,46216.93,47954.63,46208.37,47722.65,19311.57065
2022-01-02 00:00:00,47722.65,47990.00,46654.00,47286.18,18094.83401
2022-01-03 00:00:00,47283.71,47570.00,45696.00,46446.09,27241.59512
2022-01-04 00:00:00,46446.09,47557.54,45500.00,45832.02,33716.54110
2022-01-05 00:00:00,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...
2022-12-27,,,,,
2022-12-28,,,,,
2022-12-29,,,,,
2022-12-30,,,,,


In [19]:
btc['2022']

Unnamed: 0_level_0,btc_open,btc_high,btc_low,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,46216.93,47954.63,46208.37,47722.65,19311.57065
2022-01-02,47722.65,47990.00,46654.00,47286.18,18094.83401
2022-01-03,47283.71,47570.00,45696.00,46446.09,27241.59512
2022-01-04,46446.09,47557.54,45500.00,45832.02,33716.54110
2022-01-05,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...
2022-12-08,16837.61,17298.58,16733.49,17224.82,122071.70341
2022-12-09,17223.70,17353.17,17058.21,17128.55,128639.48470
2022-12-10,17128.56,17225.12,17092.00,17127.49,75995.09380
2022-12-11,17127.09,17270.99,17071.17,17084.59,84358.06696


In [20]:
from datetime import timedelta


last_date = btc['2022'].iloc[[-1]].index
last_date = last_date + timedelta(days=1)

#for i in range(1,18):
#    last_date = last_date + timedelta(days=i)
#    stuff2022 = btc['2022'].append(pd.DataFrame(index=last_date))
#df = btc['2022'].append(pd.DataFrame(index=last_date))
df = btc['2022']

In [21]:
df

Unnamed: 0_level_0,btc_open,btc_high,btc_low,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,46216.93,47954.63,46208.37,47722.65,19311.57065
2022-01-02,47722.65,47990.00,46654.00,47286.18,18094.83401
2022-01-03,47283.71,47570.00,45696.00,46446.09,27241.59512
2022-01-04,46446.09,47557.54,45500.00,45832.02,33716.54110
2022-01-05,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...
2022-12-08,16837.61,17298.58,16733.49,17224.82,122071.70341
2022-12-09,17223.70,17353.17,17058.21,17128.55,128639.48470
2022-12-10,17128.56,17225.12,17092.00,17127.49,75995.09380
2022-12-11,17127.09,17270.99,17071.17,17084.59,84358.06696


In [22]:
#df.append(pd.DataFrame({'date': pd.date_range(start=df.date.iloc[-1], periods=19, freq='D', closed='right')}))

In [23]:
df

Unnamed: 0_level_0,btc_open,btc_high,btc_low,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,46216.93,47954.63,46208.37,47722.65,19311.57065
2022-01-02,47722.65,47990.00,46654.00,47286.18,18094.83401
2022-01-03,47283.71,47570.00,45696.00,46446.09,27241.59512
2022-01-04,46446.09,47557.54,45500.00,45832.02,33716.54110
2022-01-05,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...
2022-12-08,16837.61,17298.58,16733.49,17224.82,122071.70341
2022-12-09,17223.70,17353.17,17058.21,17128.55,128639.48470
2022-12-10,17128.56,17225.12,17092.00,17127.49,75995.09380
2022-12-11,17127.09,17270.99,17071.17,17084.59,84358.06696


In [24]:
df = df.reset_index()

In [25]:
df

Unnamed: 0,date,btc_open,btc_high,btc_low,btc_close,btc_volume
0,2022-01-01,46216.93,47954.63,46208.37,47722.65,19311.57065
1,2022-01-02,47722.65,47990.00,46654.00,47286.18,18094.83401
2,2022-01-03,47283.71,47570.00,45696.00,46446.09,27241.59512
3,2022-01-04,46446.09,47557.54,45500.00,45832.02,33716.54110
4,2022-01-05,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...,...
341,2022-12-08,16837.61,17298.58,16733.49,17224.82,122071.70341
342,2022-12-09,17223.70,17353.17,17058.21,17128.55,128639.48470
343,2022-12-10,17128.56,17225.12,17092.00,17127.49,75995.09380
344,2022-12-11,17127.09,17270.99,17071.17,17084.59,84358.06696


In [26]:
df = df.append(pd.DataFrame({'date': pd.date_range(start=df.date.iloc[-1], periods=20, freq='D', closed='right')}))


In [27]:
#df.iloc[-1]

In [28]:
df = df.set_index('date')

In [29]:
df

Unnamed: 0_level_0,btc_open,btc_high,btc_low,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,46216.93,47954.63,46208.37,47722.65,19311.57065
2022-01-02,47722.65,47990.00,46654.00,47286.18,18094.83401
2022-01-03,47283.71,47570.00,45696.00,46446.09,27241.59512
2022-01-04,46446.09,47557.54,45500.00,45832.02,33716.54110
2022-01-05,45832.01,47069.81,42500.00,43451.13,50968.71595
...,...,...,...,...,...
2022-12-27,,,,,
2022-12-28,,,,,
2022-12-29,,,,,
2022-12-30,,,,,


In [30]:
#df.iloc[-1]

In [31]:
#train = train[train.index != '2020-02-29']
#validate = validate.groupby(validate.index.day).bfill()

In [106]:
def clean_btc_data_2022(btc):
    df = btc['2022']
    df = df.reset_index()
    df = df.append(pd.DataFrame({'date': pd.date_range(start=df.date.iloc[-1], periods=20, freq='D', closed='right')}))
    df = df.set_index('date')
    df = df.groupby(df.index.day).ffill()
    btc = pd.concat([btc[:'2021'], df], ignore_index=False)#,axis=1)
    return btc

In [107]:
#btc1 = clean_btc_data_2022(btc)

In [108]:
#btc1.tail(31)

In [109]:
def clean_btc_data_2021(btc):
    df = btc['2021'].resample('D').mean()
    df = df.groupby(df.index.day).bfill()
    df2 = pd.concat([btc[:'2020'], df], ignore_index=False)
    btc = pd.concat([df2, btc['2022']], ignore_index=False)
    return btc

In [110]:
def remove_leap_day(btc):
    btc = btc[btc.index != '2020-02-29']
    
    return btc

In [None]:
#volume = train['btc_volume'][-1:][0]

In [72]:
#btc1 = clean_btc_data_2021(btc)

In [73]:
#btc1

In [74]:
#btc1['2022'].info()

In [111]:
def clean_data(btc):
    
    btc = remove_leap_day(btc)
    
    btc = clean_btc_data_2021(btc)
    
    btc = clean_btc_data_2022(btc)
    
    return btc

In [112]:
test_df = clean_data(btc)

In [113]:
test_df

Unnamed: 0_level_0,btc_open,btc_high,btc_low,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-01,13715.65,13818.55,12750.00,13380.00,8609.915844
2018-01-02,13382.16,15473.49,12890.02,14675.11,20078.092111
2018-01-03,14690.00,15307.56,14150.00,14919.51,15905.667639
2018-01-04,14919.51,15280.00,13918.04,15059.54,21329.649574
2018-01-05,15059.56,17176.24,14600.00,16960.39,23251.491125
...,...,...,...,...,...
2022-12-27,16458.82,16600.00,16401.96,16428.78,69113.865880
2022-12-28,16428.39,16486.21,15997.65,16213.74,105559.308580
2022-12-29,16212.20,16547.29,16100.00,16442.91,105825.529980
2022-12-30,16442.91,17245.48,16428.30,17163.55,121727.180360


In [116]:
test_df['2022'].info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 365 entries, 2022-01-01 to 2022-12-31
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   btc_open    365 non-null    float64
 1   btc_high    365 non-null    float64
 2   btc_low     365 non-null    float64
 3   btc_close   365 non-null    float64
 4   btc_volume  365 non-null    float64
dtypes: float64(5)
memory usage: 17.1 KB
