In [2]:
#Importing the necessary libraries

import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, Holt, ExponentialSmoothing
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from prophet import Prophet

**Data Cleaning and Preprocessing**

In [5]:
#Loading the dataset 

df = pd.read_csv('Bitcoin.csv')

#Converting the price column to float datatype

df['Price'] = df['Price'].str.replace(',','').astype(float)


Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,"Aug 24, 2021",48846.3,49532.5,49847.4,48812.4,59.89K,-1.40%
1,"Aug 23, 2021",49539.7,49255.4,50498.8,49038.6,75.37K,0.58%
2,"Aug 22, 2021",49254.5,48870.4,49480.3,48151.0,49.32K,0.77%
3,"Aug 21, 2021",48875.8,49330.0,49719.1,48293.5,18.20K,-0.91%
4,"Aug 20, 2021",49324.0,46756.7,49343.3,46646.8,83.19K,5.49%
...,...,...,...,...,...,...,...
4051,"Jul 22, 2010",0.1,0.1,0.1,0.1,2.16K,0.00%
4052,"Jul 21, 2010",0.1,0.1,0.1,0.1,0.58K,0.00%
4053,"Jul 20, 2010",0.1,0.1,0.1,0.1,0.26K,0.00%
4054,"Jul 19, 2010",0.1,0.1,0.1,0.1,0.57K,0.00%


**Using the Augmented-Dickey Fuller Test**

It is used to test the null hypothesis that a given time-series is non-stationary i.e the statistical properties (like mean, variance) change over time. It is typical of stock/ bitcoin prices that often exhibit trends, vlatility changes, or seasonality, all of which make them non-stationary.  

- Null hypothesis (H0): The time series is non-stationary(it is likely to have trends or other time-varying properties).

- Alternative hypothesis (H1): The time series is stationary(its statistical properties are constant over time).

In [6]:
#Checking for stationarity in the data 

result = adfuller(df['Price'])
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])



ADF Statistic: -2.687474
p-value: 0.076231


Since the p-value (0.076) is greater than the threshold of 0.05, we do not have enough evidence to reject the null hypothesis and we can conclude that the time-series is non-stationary. 