In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import statsmodels.tsa.stattools as ts
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm
import matplotlib.dates as mdates
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")
from statsmodels.tools.sm_exceptions import ConvergenceWarning
warnings.simplefilter('ignore', UserWarning)
warnings.simplefilter('ignore', FutureWarning)
warnings.simplefilter('ignore', ConvergenceWarning)

In [2]:
eth_data = pd.read_csv("../Data/cleaned_eth_data.csv")


In [3]:
px.line(eth_data, x=eth_data.index, y='Close', title='Ethereum (ETH/USDT) Daily Closing Price')


In [4]:
px.line(eth_data, x=eth_data.index, y='Volume', title='Ethereum (ETH/USDT) Daily Trading Volume') #Daily trade volume is significantly higher than the price, so I used a log scale for better visualization
px.line(eth_data, x=eth_data.index, y='Volume', title='Ethereum (ETH/USDT) Daily Trading Volume').update_yaxes(type="log")

In [5]:
eth_data['Price_Change_1D'] = eth_data['Close'].pct_change()
eth_data['Price_Change_7D'] = eth_data['Close'].pct_change(periods=7)
eth_data['Price_Change_30D'] = eth_data['Close'].pct_change(periods=30)

fig_change = px.line(eth_data, x=eth_data.index, y=['Price_Change_1D', 'Price_Change_7D', 'Price_Change_30D'],
                     title='Ethereum Price Percentage Change')
fig_change.show()

In [6]:
eth_data['Volatility_30D'] = eth_data['Close'].rolling(window=30).std()
eth_data['Volatility_90D'] = eth_data['Close'].rolling(window=90).std()

fig_volatility = px.line(eth_data, x=eth_data.index, y=['Volatility_30D', 'Volatility_90D'],
                       title='Ethereum Rolling Volatility')
fig_volatility.show()

In [7]:
# Calculate Autocorrelation Function (ACF)
acf_values = ts.acf(eth_data['Close'], nlags=30)
acf_df = pd.DataFrame({'Lag': range(31), 'ACF': acf_values})

fig_acf_px = px.scatter(acf_df, x='Lag', y='ACF',
                        title='Autocorrelation Function (ACF) of ETH Close Price',
                        labels={'ACF': 'Autocorrelation Coefficient'})
fig_acf_px.add_hline(y=0, line_dash="dash", line_color="black")
fig_acf_px.add_hline(y=1.96/len(eth_data['Close'])**0.5, line_dash="dash", line_color="red", annotation_text="Upper Bound (approx. 95% CI)", annotation_position="top right")
fig_acf_px.add_hline(y=-1.96/len(eth_data['Close'])**0.5, line_dash="dash", line_color="red", annotation_text="Lower Bound (approx. 95% CI)", annotation_position="bottom right")
fig_acf_px.show()


In [8]:

# Calculate Partial Autocorrelation Function (PACF)
pacf_values = ts.pacf(eth_data['Close'], nlags=30, method='ywmle') # You can explore different methods
pacf_df = pd.DataFrame({'Lag': range(31), 'PACF': pacf_values})

fig_pacf_px = px.scatter(pacf_df, x='Lag', y='PACF',
                         title='Partial Autocorrelation Function (PACF) of ETH Close Price',
                         labels={'PACF': 'Partial Autocorrelation Coefficient'})
fig_pacf_px.add_hline(y=0, line_dash="dash", line_color="black")
fig_pacf_px.add_hline(y=1.96/len(eth_data['Close'])**0.5, line_dash="dash", line_color="red", annotation_text="Upper Bound (approx. 95% CI)", annotation_position="top right")
fig_pacf_px.add_hline(y=-1.96/len(eth_data['Close'])**0.5, line_dash="dash", line_color="red", annotation_text="Lower Bound (approx. 95% CI)", annotation_position="bottom right")
fig_pacf_px.show()

In [9]:
fig_scatter = px.scatter(eth_data, x='Volume', y='Close', title='Ethereum Close Price vs. Volume')
fig_scatter.show()

In [10]:
eth_data['MA_30'] = eth_data['Close'].rolling(window=30).mean()
eth_data['MA_90'] = eth_data['Close'].rolling(window=90).mean()

fig = px.line(
    eth_data,
    x='Date',
    y=['Close', 'MA_30', 'MA_90'],
    title='Ethereum (ETH/USDT) Close Price with 30 & 90-Day Moving Averages'
)
fig.show()


In [11]:
correlation_matrix = eth_data[['Close', 'High', 'Low', 'Open', 'Volume']].corr()

# Plot the heatmap using Plotly Express
fig_heatmap = px.imshow(correlation_matrix,
                        labels=dict(x="Features", y="Features", color="Correlation"),
                        x=correlation_matrix.columns,
                        y=correlation_matrix.index,
                        color_continuous_scale="viridis",
                        title="Correlation Heatmap of ETH Price Data")
fig_heatmap.show()

In [12]:
px.box(eth_data, y='Close', title='Ethereum Closing Price Distribution')

In [13]:
Q1 = eth_data['Close'].quantile(0.25)
Q3 = eth_data['Close'].quantile(0.75)
IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

outliers = eth_data[(eth_data['Close'] < lower_bound) | (eth_data['Close'] > upper_bound)]

print("Number of outliers detected:", len(outliers))
print("\nOutlier values:")
print(outliers['Close'])

Number of outliers detected: 0

Outlier values:
Series([], Name: Close, dtype: float64)


In [14]:
mean_price = eth_data['Close'].mean()
std_price = eth_data['Close'].std()

threshold = 3
outliers_zscore = eth_data[np.abs((eth_data['Close'] - mean_price) / std_price) > threshold]

print("\nNumber of outliers detected using Z-score (threshold = 3):", len(outliers_zscore))
print("\nOutlier values (Z-score):")
print(outliers_zscore['Close'])


Number of outliers detected using Z-score (threshold = 3): 0

Outlier values (Z-score):
Series([], Name: Close, dtype: float64)
