In [1]:
#importing dependencies
import yfinance as yf
import pandas as pd
from scipy import stats
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.layers import LSTM, Bidirectional

In [2]:
#Gathering Historical Data
# Define the ticker symbol and time period

tickerSymbol = 'BTC-USD'
start_date = '2023-10-30'
end_date = '2023-11-15'

# Fetch the historical data
tickerData = yf.Ticker(tickerSymbol)
tickerDf = tickerData.history(period='1d', start=start_date, end=end_date)

print(tickerDf)

                                   Open          High           Low  \
Date                                                                  
2023-10-30 00:00:00+00:00  34531.742188  34843.933594  34110.972656   
2023-10-31 00:00:00+00:00  34500.078125  34719.253906  34083.308594   
2023-11-01 00:00:00+00:00  34657.273438  35527.929688  34170.691406   
2023-11-02 00:00:00+00:00  35441.578125  35919.843750  34401.574219   
2023-11-03 00:00:00+00:00  34942.472656  34942.472656  34133.441406   
2023-11-04 00:00:00+00:00  34736.324219  35256.031250  34616.691406   
2023-11-05 00:00:00+00:00  35090.011719  35340.339844  34594.242188   
2023-11-06 00:00:00+00:00  35044.789062  35286.027344  34765.363281   
2023-11-07 00:00:00+00:00  35047.792969  35892.417969  34545.816406   
2023-11-08 00:00:00+00:00  35419.476562  35994.417969  35147.800781   
2023-11-09 00:00:00+00:00  35633.632812  37926.257812  35592.101562   
2023-11-10 00:00:00+00:00  36702.250000  37493.800781  36362.753906   
2023-1

In [3]:
# Identifying Missing Data
missing_data = tickerDf.isnull().sum()

# Filling Missing Data (here we fill with the mean, but this is subjective)
tickerDf.fillna(tickerDf.mean(), inplace=True)

print(missing_data)


Open            0
High            0
Low             0
Close           0
Volume          0
Dividends       0
Stock Splits    0
dtype: int64


In [4]:
#Drop the columns we don't need'Dividends' and 'Stock Splits''
tickerDf = tickerDf.drop(columns=['Dividends', 'Stock Splits'])
print(tickerDf)

                                   Open          High           Low  \
Date                                                                  
2023-10-30 00:00:00+00:00  34531.742188  34843.933594  34110.972656   
2023-10-31 00:00:00+00:00  34500.078125  34719.253906  34083.308594   
2023-11-01 00:00:00+00:00  34657.273438  35527.929688  34170.691406   
2023-11-02 00:00:00+00:00  35441.578125  35919.843750  34401.574219   
2023-11-03 00:00:00+00:00  34942.472656  34942.472656  34133.441406   
2023-11-04 00:00:00+00:00  34736.324219  35256.031250  34616.691406   
2023-11-05 00:00:00+00:00  35090.011719  35340.339844  34594.242188   
2023-11-06 00:00:00+00:00  35044.789062  35286.027344  34765.363281   
2023-11-07 00:00:00+00:00  35047.792969  35892.417969  34545.816406   
2023-11-08 00:00:00+00:00  35419.476562  35994.417969  35147.800781   
2023-11-09 00:00:00+00:00  35633.632812  37926.257812  35592.101562   
2023-11-10 00:00:00+00:00  36702.250000  37493.800781  36362.753906   
2023-1

In [5]:
#adding z-score to the data

z_scores = np.abs(stats.zscore(tickerDf['Close']))
tickerDf = tickerDf[(z_scores < 3)]  # Keeping only rows with z-score less than 3

print(z_scores)

Date
2023-10-30 00:00:00+00:00    1.273224
2023-10-31 00:00:00+00:00    1.093476
2023-11-01 00:00:00+00:00    0.257348
2023-11-02 00:00:00+00:00    0.799587
2023-11-03 00:00:00+00:00    1.023342
2023-11-04 00:00:00+00:00    0.643164
2023-11-05 00:00:00+00:00    0.678848
2023-11-06 00:00:00+00:00    0.691871
2023-11-07 00:00:00+00:00    0.250493
2023-11-08 00:00:00+00:00    0.020439
2023-11-09 00:00:00+00:00    1.107312
2023-11-10 00:00:00+00:00    1.781936
2023-11-11 00:00:00+00:00    1.590780
2023-11-12 00:00:00+00:00    1.500013
2023-11-13 00:00:00+00:00    0.900017
2023-11-14 00:00:00+00:00    0.148266
Name: Close, dtype: float64


In [6]:
#Feature Engineering

# Indicator Calculation (e.g., moving average)
tickerDf['MA_10'] = tickerDf['Close'].rolling(window=10).mean()  # 10 days moving average

# Normalization (Standard Scaling as example)
scaler = StandardScaler()
tickerDf['Close_Scaled'] = scaler.fit_transform(tickerDf[['Close']])

In [7]:
# Statistical Analysis

tickerDf['Return'] = tickerDf['Close'].pct_change()  # Daily returns
tickerDf['Volatility'] = tickerDf['Return'].rolling(window=30).std() * np.sqrt(30)  # Monthly volatility

In [8]:
# Time Series Decomposition Plots
decomposition = seasonal_decompose(tickerDf['Close'], model='additive', period=30)
decomposition.plot()
plt.show()

# Correlation Heatmaps
sns.heatmap(tickerDf.corr(), annot=True, cmap='coolwarm')
plt.show()

ValueError: x must have 2 complete cycles requires 60 observations. x only has 16 observation(s)

In [None]:
# Predictor Identification
# Note: This is more about forming hypotheses and identifying potential predictors based on domain knowledge and initial analysis.

# Theory Testing - Example with Simple Linear Regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Assuming we want to predict 'Close' using 'Volume' (as an example)
X = tickerDf[['Volume']]
y = tickerDf['Close']

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Creating a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predicting and evaluating the model
predictions = model.predict(X_test)
print("Mean Squared Error:", mean_squared_error(y_test, predictions))

Mean Squared Error: 411851.83028440387


In [None]:
# Train-Test Split
X = tickerDf[['Open', 'Close']] 

# Temporal Split (e.g., using the first 80% of data for training and the rest for testing)
split_point = int(len(tickerDf) * 0.8)
X_train, X_test = X[:split_point], X[split_point:]
y_train, y_test = y[:split_point], y[split_point:]

In [None]:
# Neural Network initialization

# Initialize the Neural Network
model_nn = Sequential()
model_nn.add(Dense(128, activation='relu', input_dim=X_train.shape[1]))
model_nn.add(Dropout(0.2))  # Regularization with Dropout
model_nn.add(Dense(64, activation='tanh'))  # Different activation function
model_nn.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))  # L2 regularization
model_nn.add(Dense(1))  # Output layer

# Compile the model
model_nn.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model_nn.fit(X_train, y_train, epochs=100, batch_size=32)

# Make predictions
nn_predictions = model_nn.predict(X_test)

# Calculate Mean Squared Error
nn_mse = mean_squared_error(y_test, nn_predictions)
print("Neural Network MSE:", nn_mse)

# For accuracy (example: considering a prediction within a certain percentage of the actual value as accurate)
accuracy_threshold = 0.05  # 5%
nn_accurate_predictions = np.abs(nn_predictions.flatten() - y_test) <= accuracy_threshold * y_test
nn_accuracy = np.mean(nn_accurate_predictions)
print("Neural Network Accuracy:", nn_accuracy)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Neural Network MSE: 1333315292.3671188
Neural Network Accuracy: 0.0


In [None]:
#LSTM initialization

# Reshape input for LSTM [samples, time steps, features]
X_train_reshaped = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))

# Build LSTM model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(1, X_train.shape[1])))
model.add(Dense(1))

# Compile and fit the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train_reshaped, y_train, epochs=100, batch_size=32)

#prediction
lstm_predictions = model.predict(X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1])))

# Calculate Mean Squared Error
lstm_mse = mean_squared_error(y_test, lstm_predictions)
print("LSTM MSE:", lstm_mse)

# For accuracy
lstm_accurate_predictions = np.abs(lstm_predictions.flatten() - y_test) <= accuracy_threshold * y_test
lstm_accuracy = np.mean(lstm_accurate_predictions)
print("LSTM Accuracy:", lstm_accuracy)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
LSTM MSE: 664410690.9068463
LSTM Accuracy: 0.0


In [None]:
sequence_length = 5

model_lstm = Sequential()
model_lstm.add(Bidirectional(LSTM(50, activation='relu', return_sequences=True), input_shape=(sequence_length, X_train.shape[2])))
model_lstm.add(Bidirectional(LSTM(30, activation='relu')))
model_lstm.add(Dense(1))

# Compile the model
model_lstm.compile(optimizer='adam', loss='mean_squared_error')

# Reshape input data for LSTM
X_train_reshaped = X_train.values.reshape((X_train.shape[0], sequence_length, X_train.shape[1]))

# Train the model
model_lstm.fit(X_train_reshaped, y_train, epochs=20, batch_size=32)

lstm_mse = mean_squared_error(y_test, lstm_predictions)
print("LSTM MSE:", lstm_mse)

# For accuracy
lstm_accurate_predictions = np.abs(lstm_predictions.flatten() - y_test) <= accuracy_threshold * y_test
lstm_accuracy = np.mean(lstm_accurate_predictions)
print("LSTM Accuracy:", lstm_accuracy)

IndexError: tuple index out of range