In [17]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

import h5py

In [None]:
data = pd.read_excel("forex_data.xlsx")

In [None]:
data.head()

In [None]:
data.columns

Assigning 

# Cleaning the data

Assigning column headings to the DataFrame


In [None]:
data.columns = ['Date', 'Open', 'High', 'Low','Close', 'Volume']

In [None]:
# Save the updated Dataframe to a new CSV file
# data.to_csv('forex_data.csv', index=False)

# Load the updated dataframe

In [None]:
data = pd.read_csv("forex_data.csv")

In [None]:
data.head()

# Checking For missing values
Check if there are any missing values in the data and decide how to handle them. You can either remove the rows with missing values or impute the missing values with appropriate techniques such as mean or median imputation.<br>
In our case we will use the mean


In [None]:
if data.isnull().values.any():
    data = data.fillna(data.mean())

# Checkin for duplicates.
Check if there are any duplicate rows in the data and remove them if necessary.

In [None]:
if data.duplicated().values.any():
    data= data.drop_duplicates()

# Remove outliers from the 'Close' column using z-score
Next, we remove outliers from the 'Close' column using z-score, where any data point that lies outside 3 standard deviations from the mean is considered an outlier and removed.


In [None]:
data = data[(data['Close'] - data['Close'].mean())/ data['Close'].std() < 3]

# Normalize the data usng StandardScaler
Finally, we normalize the data using StandardScaler by first initializing a StandardScaler object, and then applying it to the relevant columns using the fit_transform() method.

In [None]:
scaler = StandardScaler()
data[['Open', 'High', 'Low', 'Close', 'Volume']] = scaler.fit_transform(data[['Open', 'High', 'Low', 'Close', 'Volume']])


In [2]:
# Save the Cleaned DataFrame to a new CSV file
data.to_csv('Cleaned_forex_data.csv', index=False)
data.head()

NameError: name 'data' is not defined

# Reshaping the dataset

In the case of time series data like Forex, the shape of the input data is very important. The input data should be a 3D array of shape (num_samples, sequence_length, num_features), where:

```num_samples``` is the number of samples in the dataset.
```sequence_length``` is the length of the input sequence that the model will process at once.
```num_features``` is the number of features in the input data.
This 3D array can be interpreted as a sequence of ```num_samples``` input sequences, each of length ```sequence_length``` and with ```num_features``` features.

In [3]:
# Read the cleaned data CSV file into pandas DataFrame
data = pd.read_csv("Cleaned_forex_data.csv")


# Define the sequence length
sequence_length = 50


In [None]:
# Create a function to reshape the data
def create_sequences(data, sequence_length):
    X = []
    y = []
    for i in range(len(data)-sequence_length-1):
        X.append(data.iloc[i:(i+sequence_length), 1:6])
        y.append(data.iloc[(i+sequence_length), 4])
    return np.array(X), np.array(y)

# Reshape the data
X, y = create_sequences(data, sequence_length)

In [None]:
# Split the data into training , validation and test sets
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, shuffle=False)
train_X, val_X, train_y, val_y = train_test_split(train_X, train_y, test_size=0.25, shuffle=False)






# Save the split data to an HDF5 file
with h5py.File('data.h5', 'w') as f:
    f.create_dataset('train_X', data=train_X)
    f.create_dataset('train_y', data=train_y)
    f.create_dataset('val_X', data=val_X)
    f.create_dataset('val_y', data=val_y)
    f.create_dataset('test_X', data=test_X)
    f.create_dataset('test_y', data=test_y)

# Load the data from the HDF5 file

In [4]:

with h5py.File('data.h5', 'r') as f:
    train_X = f['train_X'][:]
    train_y = f['train_y'][:]
    val_X = f['val_X'][:]
    val_y = f['val_y'][:]
    test_X = f['test_X'][:]
    test_y = f['test_y'][:]

Calculate the RSI indicator using the closing prices of the Forex data. Here is an example of how to calculate RSI using a 14-day window:

In [5]:
delta = data['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean().abs()
rs = avg_gain / avg_loss
rsi = 100 - (100 / (1 + rs))

data['rsi'] = rsi


Create a binary label column to represent the direction of price movement based on the RSI values. For example, if the RSI is above 70, we can label it as 1 for a potential downward price movement, and if the RSI is below 30, we can label it as 0 for a potential upward price movement. If the RSI value is between 30 and 70, we can label it as 2.

In [6]:
data["label"] = np.where(rsi > 70, 1, np.where(rsi < 30, 0, 2))

In [7]:
# Convert NumPy array back to a DataFrame
data = pd.DataFrame(data, columns=['Open', 'High', 'Low', 'Close', 'Volume', 'rsi', 'label'])

In [8]:
print(data.columns)

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'rsi', 'label'], dtype='object')


Normalize the data using MinMaxScaler:

In [9]:
scaler = MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(data[['Open', 'High', 'Low', 'Close', 'Volume', 'rsi']].values)

Create input and output sequences of a fixed time window size, for example, a 30-day window. Here is an example of how to create a sliding window of size 30:

In [33]:
window_size = 30
X = []
y = []
for i in range(window_size, len(data)):

    X.append(data[i - window_size:i, :-1])
    y.append(data[i, -1])
X = np.array(X)
y = np.array(y)


In [34]:
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, shuffle=False)
train_X, val_X, train_y, val_y = train_test_split(train_X, train_y, test_size=0.25, shuffle=False)

Define and train a LSTM model:

In [39]:
model = Sequential()
num_features = 2
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(window_size, train_X.shape[2])))

model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(3, activation='softmax'))

In [40]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [38]:
history = model.fit(train_X, train_y, epochs=50, batch_size=32, validation_data=(val_X, val_y))


Epoch 1/50


ValueError: in user code:

    File "c:\Users\jaycee\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\jaycee\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\jaycee\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\jaycee\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 860, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "c:\Users\jaycee\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 918, in compute_loss
        return self.compiled_loss(
    File "c:\Users\jaycee\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "c:\Users\jaycee\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\losses.py", line 141, in __call__
        losses = call_fn(y_true, y_pred)
    File "c:\Users\jaycee\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\losses.py", line 245, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "c:\Users\jaycee\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\losses.py", line 1789, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "c:\Users\jaycee\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\backend.py", line 5083, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 1) and (None, 3) are incompatible


In [None]:
train_loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(train_loss) + 1)
plt.plot(epochs, train_loss, 'g', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()