In [40]:

import pandas as pd
import numpy as  np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from datetime import datetime
import statsmodels.api as sm
import plotly.graph_objs as go
from plotly.subplots import make_subplots

In [68]:
df = pd.read_csv("D:\Time_series_project\BTC-USD.csv")
df["Date"] = pd.to_datetime(df["Date"])
df_time = df.set_index(df["Date"])
df_monthly = df_time.resample('B').mean()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3503 entries, 0 to 3502
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       3503 non-null   datetime64[ns]
 1   Open       3503 non-null   float64       
 2   High       3503 non-null   float64       
 3   Low        3503 non-null   float64       
 4   Close      3503 non-null   float64       
 5   Adj Close  3503 non-null   float64       
 6   Volume     3503 non-null   int64         
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 191.7 KB


In [5]:
df["Year"] = df["Date"].dt.year

In [27]:
fig = px.line(df, x='Date', y='Adj Close', color='Year', title='Time Series Data Year-wise', 
              labels={'Adj Close': 'Adjusted Close', 'Date': 'Date'}, 
              template='plotly_dark')
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Adjusted Close')
fig.show()

### Time Series Stationarity check

In [67]:
from statsmodels.tsa.stattools import kpss, adfuller
print("KPSS (Kwiatkowski-Phillips-Schmidt-Shin) Test")
print("   Null hypothesis for this test is - Series is stationary \n   Alternative hypothesis for this test is - Series is not stationary")
ststs,p,lags, critical_values = kpss(df_monthly['Adj Close'],'ct')
print(f"      Test Statistics : {ststs}")
print(f"      P-value : {p}")
print(f"      Critical values : {critical_values}")
if p<0.05:
    print("Series is not stationary")
else:
    print("Series is stationary")
print("------------------------------------------------------------------------------------")
print("ADF (Augmented Dickey-Fuller) Test")
print("   Null Hypothesis- Series possesses a unit root and hence is not stationary \n   Alternate Hypothesis - Series is stationary")

result = adfuller(df_monthly['Adj Close'])
print(f"      Test Statistics : {result[0]}")
print(f"      P-value : {result[1]}")
print(f"      Critical values : {result[4]}")
if result[1]>0.05:
    print("Series is not stationary")
else:
    print("Series is stationary")

KPSS (Kwiatkowski-Phillips-Schmidt-Shin) Test
   Null hypothesis for this test is - Series is stationary 
   Alternative hypothesis for this test is - Series is not stationary
      Test Statistics : 0.3586790249335492
      P-value : 0.01
      Critical values : {'10%': 0.119, '5%': 0.146, '2.5%': 0.176, '1%': 0.216}
Series is not stationary
------------------------------------------------------------------------------------
ADF (Augmented Dickey-Fuller) Test
   Null Hypothesis- Series possesses a unit root and hence is not stationary 
   Alternate Hypothesis - Series is stationary
      Test Statistics : -0.9457360125350205
      P-value : 0.7725381830801121
      Critical values : {'1%': -3.4329938176251593, '5%': -2.8627080196071697, '10%': -2.567391782912405}
Series is not stationary



The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is smaller than the p-value returned.




### Time Series Decomposition  (original, trend, seasonal, and residual)

In [74]:
res = sm.tsa.seasonal_decompose(
    df_monthly['Adj Close'],
    model = 'multiplicative'
)
fig = make_subplots(rows=4, cols=1, shared_xaxes=True, vertical_spacing=0.01)

trace_original = fig.add_trace(go.Scatter(x=df_monthly.index, y=df_monthly['Adj Close'], mode='lines', name='Original'), row=1, col=1)
trace_trend = fig.add_trace(go.Scatter(x=df_monthly.index, y=res.trend, mode='lines', name='Trend'), row=2, col=1)
trace_seasonal = fig.add_trace(go.Scatter(x=df_monthly.index, y=res.seasonal, mode='lines', name='Seasonal'), row=3, col=1)
trace_residual = fig.add_trace(go.Scatter(x=df_monthly.index, y=res.resid, mode='lines', name='Residual'), row=4, col=1)

# Combine traces into a single figure
# fig = go.Figure([trace_original, trace_trend, trace_seasonal, trace_residual])

# Update figure layoutimport plotly.graph_objs as go
fig.update_layout(title='Seasonal Decomposition using Plotly',
                   xaxis_title='Date',
                   yaxis_title='Value')

fig.update_yaxes(title_text='Original', row=1, col=1)
fig.update_yaxes(title_text='Trend', row=2, col=1)
fig.update_yaxes(title_text='Seasonal', row=3, col=1)
fig.update_yaxes(title_text='Residual', row=4, col=1)


fig.update_layout(
        #xaxis=dict(rangeslider=dict(visible=True)),
        height=1400,
        xaxis4=dict(
            rangeslider=dict(visible=True),
            type="date"
            ,
            range=[df_monthly.index.max() - pd.Timedelta(days = 90), df_monthly.index.max() ]  # Display one year initially
        )
    )
fig.update_layout(title='Seasonal Decomposition using Plotly',
                  xaxis_title='Date',
                  xaxis=dict(rangeselector=dict(
                      buttons=list([
                          dict(count=6, label="6m", step="month", stepmode="backward"),
                          dict(count=1, label="YTD", step="year", stepmode="todate"),
                          dict(count=1, label="1y", step="year", stepmode="backward"),
                          dict(step="all")
                      ])
                  ),
                               type="date")
                  )
fig.show()

The residual component typically contains information about irregular or unpredictable fluctuations in the data, such as short-term fluctuations, outliers, or measurement errors. Analyzing the residual component can help identify any remaining patterns or anomalies in the data that are not captured by the trend or seasonal components.