In [1]:
pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler

In [3]:
# souce : https://api.alternative.me/fng/?limit=0&format=csv

In [4]:
import requests

def fetch_fear_and_greed_index():
    url = "https://api.alternative.me/fng/"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the Fear and Greed Index: {e}")
        return None

# Fetch the latest Fear and Greed Index data
index_data = fetch_fear_and_greed_index()

if index_data is not None:
    print("Latest Fear and Greed Index Data:")
    for item in index_data.get("data", []):
        value = item.get("value")
        classification = item.get("value_classification")
        print(f"Value: {value}, Classification: {classification}")
else:
    print("Failed to fetch the Fear and Greed Index data.")


Latest Fear and Greed Index Data:
Value: 74, Classification: Greed


In [5]:
import pandas as pd
import requests
from io import StringIO

# The URL of the CSV file
url = 'https://api.alternative.me/fng/?limit=0&format=csv'

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Decode the content of the response
    content = response.content.decode('utf-8')
else:
    print("Failed to retrieve the CSV file")


In [6]:
import pandas as pd
import re

# Using regular expressions to find all date, value, and classification entries
matches = re.findall(r'(\d{2}-\d{2}-\d{4}),(.*?),(Extreme Greed|Greed|Fear|Neutral)', content)

# Creating a DataFrame from the matches
df = pd.DataFrame(matches, columns=['date', 'fng_value', 'fng_classification'])

df.head()  # Displaying the first few rows of the DataFrame

Unnamed: 0,date,fng_value,fng_classification
0,15-04-2024,74,Greed
1,14-04-2024,72,Greed
2,13-04-2024,72,Greed
3,12-04-2024,79,Extreme Greed
4,11-04-2024,76,Extreme Greed


In [7]:
df.date

0       15-04-2024
1       14-04-2024
2       13-04-2024
3       12-04-2024
4       11-04-2024
           ...    
1730    09-02-2018
1731    08-02-2018
1732    07-02-2018
1733    03-02-2018
1734    01-02-2018
Name: date, Length: 1735, dtype: object

In [8]:
from datetime import datetime, timedelta

# Convert the 'date' column to datetime
df['date'] = pd.to_datetime(df['date'], format='%d-%m-%Y')

# Define the date range
start_date = datetime.strptime('01-02-2018', '%d-%m-%Y')
end_date = datetime.strptime('02-04-2024', '%d-%m-%Y')

# Create a complete date range
all_dates = pd.date_range(start=start_date, end=end_date, freq='D')

# Convert the all_dates to a DataFrame
df_all_dates = pd.DataFrame(all_dates, columns=['date'])

# Merge the complete date range with our data - this will show NaN where data is missing
df_merged = df_all_dates.merge(df, on='date', how='left')

# Find dates with null values (where fng_value is NaN)
missing_dates = df_merged[df_merged['fng_value'].isnull()]

missing_dates

Unnamed: 0,date,fng_value,fng_classification
1,2018-02-02,,
3,2018-02-04,,
4,2018-02-05,,
5,2018-02-06,,
54,2018-03-27,,
...,...,...,...
1771,2022-12-08,,
1794,2022-12-31,,
1801,2023-01-07,,
1802,2023-01-08,,


In [9]:
df_merged

Unnamed: 0,date,fng_value,fng_classification
0,2018-02-01,30,Fear
1,2018-02-02,,
2,2018-02-03,40,Fear
3,2018-02-04,,
4,2018-02-05,,
...,...,...,...
2248,2024-03-29,79,Extreme Greed
2249,2024-03-30,75,Greed
2250,2024-03-31,75,Greed
2251,2024-04-01,79,Extreme Greed


In [10]:
# Calculate the mean of 'fng_value' for each month and replace NaN values with the corresponding monthly mean
df_merged['fng_value'] = df_merged['fng_value'].astype(float)
df_merged['month_year'] = df_merged['date'].dt.to_period('M')

df_filled = df_merged.copy()
df_filled['fng_value'] = df_filled.groupby('month_year')['fng_value'].transform(lambda x: x.fillna(x.mean()))

df_filled = df_filled.drop('month_year', axis=1)

df_filled.head()

Unnamed: 0,date,fng_value,fng_classification
0,2018-02-01,30.0,Fear
1,2018-02-02,47.333333,
2,2018-02-03,40.0,Fear
3,2018-02-04,47.333333,
4,2018-02-05,47.333333,


In [11]:
global_median = df_merged['fng_value'].median()

df_filled['fng_value'] = df_filled['fng_value'].fillna(global_median)

df_filled[df_filled['fng_value'].isnull()]

Unnamed: 0,date,fng_value,fng_classification


In [12]:
df_fear_index = df_filled.drop(columns=['fng_classification'])
df_fear_index.head()

Unnamed: 0,date,fng_value
0,2018-02-01,30.0
1,2018-02-02,47.333333
2,2018-02-03,40.0
3,2018-02-04,47.333333
4,2018-02-05,47.333333


### Merge with the original data

In [13]:
import yfinance as yf
import datetime

In [14]:
start_date = datetime.datetime(2018, 1, 1)
end_date = datetime.datetime(2024, 1, 1)
btc_info = yf.Ticker("BTC-USD")

# pass the parameters as the taken dates for start and end
df = btc_info.history(start = start_date, end = end_date)

In [15]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-01 00:00:00+00:00,14112.200195,14112.200195,13154.700195,13657.200195,10291200000,0.0,0.0
2018-01-02 00:00:00+00:00,13625.0,15444.599609,13163.599609,14982.099609,16846600192,0.0,0.0
2018-01-03 00:00:00+00:00,14978.200195,15572.799805,14844.5,15201.0,16871900160,0.0,0.0
2018-01-04 00:00:00+00:00,15270.700195,15739.700195,14522.200195,15599.200195,21783199744,0.0,0.0
2018-01-05 00:00:00+00:00,15477.200195,17705.199219,15202.799805,17429.5,23840899072,0.0,0.0


In [16]:
df = df.reset_index()
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2018-01-01 00:00:00+00:00,14112.200195,14112.200195,13154.700195,13657.200195,10291200000,0.0,0.0
1,2018-01-02 00:00:00+00:00,13625.0,15444.599609,13163.599609,14982.099609,16846600192,0.0,0.0
2,2018-01-03 00:00:00+00:00,14978.200195,15572.799805,14844.5,15201.0,16871900160,0.0,0.0
3,2018-01-04 00:00:00+00:00,15270.700195,15739.700195,14522.200195,15599.200195,21783199744,0.0,0.0
4,2018-01-05 00:00:00+00:00,15477.200195,17705.199219,15202.799805,17429.5,23840899072,0.0,0.0


In [None]:
# Convert 'Date' to datetime
df['Date'] = pd.to_datetime(df['Date'])

In [17]:
df = df.drop(columns=['Dividends', 'Stock Splits'])

In [20]:
df.columns= ['date', 'open', 'high', 'low', 'close', 'vol']

In [21]:
df['date'] = pd.to_datetime(df['date'])
df['date'] = df['date'].dt.strftime('%Y-%m-%d')
df['date'] = pd.to_datetime(df['date'])

In [25]:
merged_df =  df.merge(df_fear_index, left_on='date', right_on='date', how='left')

merged_df.head()


Unnamed: 0,date,open,high,low,close,vol,fng_value
0,2018-01-01,14112.200195,14112.200195,13154.700195,13657.200195,10291200000,
1,2018-01-02,13625.0,15444.599609,13163.599609,14982.099609,16846600192,
2,2018-01-03,14978.200195,15572.799805,14844.5,15201.0,16871900160,
3,2018-01-04,15270.700195,15739.700195,14522.200195,15599.200195,21783199744,
4,2018-01-05,15477.200195,17705.199219,15202.799805,17429.5,23840899072,


In [28]:
merged_df[merged_df['fng_value'].isnull()]

Unnamed: 0,date,open,high,low,close,vol,fng_value
0,2018-01-01,14112.200195,14112.200195,13154.700195,13657.200195,10291200000,
1,2018-01-02,13625.0,15444.599609,13163.599609,14982.099609,16846600192,
2,2018-01-03,14978.200195,15572.799805,14844.5,15201.0,16871900160,
3,2018-01-04,15270.700195,15739.700195,14522.200195,15599.200195,21783199744,
4,2018-01-05,15477.200195,17705.199219,15202.799805,17429.5,23840899072,
5,2018-01-06,17462.099609,17712.400391,16764.599609,17527.0,18314600448,
6,2018-01-07,17527.300781,17579.599609,16087.700195,16477.599609,15866000384,
7,2018-01-08,16476.199219,16537.900391,14208.200195,15170.099609,18413899776,
8,2018-01-09,15123.700195,15497.5,14424.0,14595.400391,16659999744,
9,2018-01-10,14588.5,14973.299805,13691.200195,14973.299805,18500800512,


In [35]:
merged_df['fng_value'] = merged_df['fng_value'].fillna(method='bfill')

In [38]:
merged_df = merged_df.set_index('date')

In [39]:
df = merged_df.copy()

In [40]:
df = df[df.index > '2018-01-01']

In [41]:
# Prepare the volume and price differences, normalize volume
BTC_vol = df["vol"].values
df_diff = df.diff().dropna()
df_diff["vol"] = np.log(1 + BTC_vol[:-1])

In [42]:
df_aligned = df.loc[df_diff.index]

### Train, Test split

In [43]:
# Train data
# Period : From start of 2018 to end of 2022
mask_train = (df_diff.index >= "2018-01-01") & (df_diff.index < "2023-01-01")
df_train = df_diff.loc[mask_train].copy()
train_close = df_aligned.loc[mask_train, "close"].values
df_train["Relative_Close"] = train_close / train_close[0]

In [44]:
# Test data
# Period : Whole 2023
mask_test = (df_diff.index >= "2023-01-01") & (df_diff.index < "2024-01-01")  # December 2018 for testing
df_test = df_diff.loc[mask_test].copy()
test_close = df_aligned.loc[mask_test, "close"].values
df_test["Relative_Close"] = test_close / train_close[0]


In [45]:
df_train.head()

Unnamed: 0_level_0,open,high,low,close,vol,fng_value,Relative_Close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-03,1353.200195,128.200195,1680.900391,218.900391,23.547415,0.0,1.0
2018-01-04,292.5,166.900391,-322.299805,398.200195,23.548915,0.0,1.026196
2018-01-05,206.5,1965.499023,680.599609,1830.299805,23.804405,0.0,1.146602
2018-01-06,1984.899414,7.201172,1561.799805,97.5,23.894668,0.0,1.153016
2018-01-07,65.201172,-132.800781,-676.899414,-1049.400391,23.630964,0.0,1.083981


In [46]:
df_test.head()

Unnamed: 0_level_0,open,high,low,close,vol,fng_value,Relative_Close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-01-01,-55.759766,1.453125,3.714844,77.583984,23.142672,-1.678571,1.093683
2023-01-02,77.595703,128.904297,50.994141,63.390625,22.94728,1.0,1.097853
2023-01-03,63.337891,1.103516,50.142578,-8.613281,23.216287,-1.0,1.097287
2023-01-04,-8.642578,204.138672,45.392578,183.380859,23.355376,3.0,1.109351
2023-01-05,183.267578,-80.564453,122.519531,-26.501953,23.636798,0.0,1.107607


In [47]:
# Generate dataset function
def generate_dataset(df, seq_len):
    X_list, y_list = [], []
    for i in range(len(df) - seq_len):
        X_list.append(df.iloc[i:(i+seq_len), :].values)
        y_list.append(df["close"].iloc[i + seq_len])
    return np.array(X_list), np.array(y_list)

In [48]:
LAG = 1

In [49]:
# # Prepare training and test datasets
# X_train, y_train = generate_dataset(df_train, LAG)
# X_test, y_test = generate_dataset(pd.concat((df_train.iloc[-LAG:], df_test)), LAG)

In [50]:
validation_size = 0.2
n_validation = int(len(df_train) * validation_size)

df_val = df_train.iloc[-n_validation:]
df_train_reduced = df_train.iloc[:-n_validation]

X_train, y_train = generate_dataset(df_train_reduced, LAG)
X_val, y_val = generate_dataset(pd.concat((df_train_reduced.iloc[-LAG:], df_val)), LAG)
X_test, y_test = generate_dataset(pd.concat((df_train.iloc[-LAG:], df_test)), LAG)

In [51]:
num_samples, num_timesteps, num_features = X_train.shape
X_train_reshaped = X_train.reshape(-1, num_features)
scaler = MinMaxScaler(feature_range=(0, 1))
X_train_scaled = scaler.fit_transform(X_train_reshaped)
X_train_scaled = X_train_scaled.reshape(num_samples, num_timesteps, num_features)

In [52]:
num_samples_val, num_timesteps, num_features = X_val.shape
X_val_reshaped = X_val.reshape(-1, num_features)
X_val_scaled = scaler.transform(X_val_reshaped)
X_val_scaled = X_val_scaled.reshape(num_samples_val, num_timesteps, num_features)

In [53]:
num_samples_test, num_timesteps, num_features = X_test.shape
X_test_reshaped = X_test.reshape(-1, num_features)
X_test_scaled = scaler.transform(X_test_reshaped)
X_test_scaled = X_test_scaled.reshape(num_samples_test, num_timesteps, num_features)

### Hyperparameter Tuning

In [54]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from kerastuner.tuners import RandomSearch

  from kerastuner.tuners import RandomSearch


In [55]:
def build_model(hp):
    model = Sequential([
        LSTM(
            units=hp.Int('units1', min_value=32, max_value=256, step=32),
            return_sequences=True,
            input_shape=(X_train.shape[1], X_train.shape[2]),
            recurrent_dropout=hp.Float('recurrent_dropout1', min_value=0.0, max_value=0.5, step=0.1)
        ),
        Dropout(rate=hp.Float('dropout1', min_value=0.0, max_value=0.5, step=0.1)),
        LSTM(
            units=hp.Int('units2', min_value=32, max_value=256, step=32),
            recurrent_dropout=hp.Float('recurrent_dropout2', min_value=0.0, max_value=0.5, step=0.1)
        ),
        Dropout(rate=hp.Float('dropout2', min_value=0.0, max_value=0.5, step=0.1)),
        Dense(
            units=hp.Int('dense_units', min_value=16, max_value=128, step=16),
            activation='relu'
        ),
        Dense(1)
    ])

    model.compile(
        optimizer=Adam(
            learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')
        ),
        loss='mse'
    )

    return model

In [56]:
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,  # Number of different configurations to try
    executions_per_trial=1,  # Number of models to train for each trial
    directory='my_dir',  # Directory to save logs and models
    project_name='lstm_tuning'
)

In [57]:
# Display search space summary
tuner.search_space_summary()

Search space summary
Default search space size: 8
units1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 256, 'step': 32, 'sampling': 'linear'}
recurrent_dropout1 (Float)
{'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 0.5, 'step': 0.1, 'sampling': 'linear'}
dropout1 (Float)
{'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 0.5, 'step': 0.1, 'sampling': 'linear'}
units2 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 256, 'step': 32, 'sampling': 'linear'}
recurrent_dropout2 (Float)
{'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 0.5, 'step': 0.1, 'sampling': 'linear'}
dropout2 (Float)
{'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 0.5, 'step': 0.1, 'sampling': 'linear'}
dense_units (Int)
{'default': None, 'conditions': [], 'min_value': 16, 'max_value': 128, 'step': 16, 'sampling': 'linear'}
learning_rate (Float)
{'default': 0.0001, 'conditions': [], 'min_value': 0.0001

In [58]:
# Perform the hyperparameter search
tuner.search(
    X_train_scaled, y_train,
    epochs=5,
    validation_data=(X_val_scaled, y_val),
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)]
)

Trial 10 Complete [00h 00m 10s]
val_loss: 1026409.5625

Best val_loss So Far: 1026281.6875
Total elapsed time: 00h 02m 02s


In [59]:
# Initialize an empty list to hold each trial's data
trial_data = []

# Iterate through each trial and collect the data
for trial_id, trial in tuner.oracle.trials.items():
    if trial.status == "COMPLETED":
        # Extract the final validation loss for the trial
        val_loss = trial.metrics.get_best_value('val_loss')
        # Prepare a dictionary for the trial
        trial_info = {
            'Trial ID': trial_id,
            'MSE': val_loss
        }
        # Update the dictionary with the hyperparameters
        trial_info.update(trial.hyperparameters.values)
        # Append the dictionary to the list
        trial_data.append(trial_info)

# Convert the list of dictionaries to a DataFrame
df_trials = pd.DataFrame(trial_data)

# Display the DataFrame
df_trials

Unnamed: 0,Trial ID,MSE,units1,recurrent_dropout1,dropout1,units2,recurrent_dropout2,dropout2,dense_units,learning_rate
0,0,1026307.0,96,0.3,0.2,256,0.0,0.4,96,0.002566
1,1,1026282.0,224,0.0,0.2,128,0.2,0.0,112,0.000342
2,2,1026630.0,256,0.4,0.1,96,0.1,0.3,96,0.008325
3,3,1026289.0,128,0.1,0.4,224,0.0,0.0,96,0.001214
4,4,1026282.0,32,0.4,0.4,64,0.2,0.0,64,0.000114
5,5,1026295.0,192,0.0,0.0,160,0.4,0.0,16,0.003867
6,6,1026531.0,256,0.2,0.0,160,0.3,0.0,96,0.007519
7,7,1026346.0,64,0.0,0.0,192,0.3,0.0,64,0.001932
8,8,1026328.0,160,0.3,0.2,64,0.0,0.3,112,0.004138
9,9,1026410.0,96,0.0,0.4,192,0.2,0.3,48,0.00832


In [60]:
# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first LSTM layer is {best_hps.get('units1')},
the optimal dropout rates are {best_hps.get('dropout1')} for the first dropout layer and {best_hps.get('dropout2')} for the second dropout layer,
the optimal number of units in the second LSTM layer is {best_hps.get('units2')}, and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")


The hyperparameter search is complete. The optimal number of units in the first LSTM layer is 224,
the optimal dropout rates are 0.2 for the first dropout layer and 0.0 for the second dropout layer,
the optimal number of units in the second LSTM layer is 128, and the optimal learning rate for the optimizer
is 0.0003424899128529849.



In [61]:
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train_scaled, y_train, epochs=50, validation_data=(X_val_scaled, y_val))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [62]:
predicted_prices = model.predict(X_test_scaled)



In [63]:
print("Test MSE:", np.mean((predicted_prices - y_test)**2))

Test MSE: 427706.4906535971


### Plot the comparison between actual and predicted value