# Price prediction of USD EUR pair

As raw data we have daylie currency prices of the last ~20 years in an xml file.

First lets convert the values of the file to a Dataframe.

In [4]:
pip install lxml

Note: you may need to restart the kernel to use updated packages.


In [5]:
from bs4 import BeautifulSoup
import pandas as pd


# Reading the data inside the xml
# file to a variable under the name 
# data
with open('data/usd.xml', 'r') as f:
    data = f.read()

# Passing the stored data inside
# the beautifulsoup parser, storing
# the returned object 
soup = BeautifulSoup(data, "xml")

time_periods = [obs['TIME_PERIOD'] for obs in soup.find_all('Obs')]
values = [obs['OBS_VALUE'] for obs in soup.find_all('Obs')]

data = pd.DataFrame({'Timestamp': time_periods, 'Price': values})

data.head


<bound method NDFrame.head of        Timestamp   Price
0     1999-01-04  1.1789
1     1999-01-05  1.1790
2     1999-01-06  1.1743
3     1999-01-07  1.1632
4     1999-01-08  1.1659
...          ...     ...
6603  2024-10-14  1.0915
6604  2024-10-15  1.0903
6605  2024-10-16  1.0897
6606  2024-10-17  1.0866
6607  2024-10-18  1.0847

[6608 rows x 2 columns]>

Check if the data is already clean

In [6]:
# Convert OBS_VALUE to numeric
data['Price'] = pd.to_numeric(data['Price'])

# Get all statistical info
stats = data['Price'].describe()

print(stats)

count    6608.000000
mean        1.185255
std         0.155542
min         0.825200
25%         1.085600
50%         1.176050
75%         1.303225
max         1.599000
Name: Price, dtype: float64


# Prepare data for the model
To do this we will create lagged features and split the data into training and testset.
But first we will fill out the weekend gaps.

In [7]:
# Convert TIME_PERIOD to datetime and OBS_VALUE to numeric
data['Timestamp'] = pd.to_datetime(data['Timestamp'])
data['Price'] = pd.to_numeric(data['Price'])

# Set TIME_PERIOD as index
data.set_index('Timestamp', inplace=True)

# Resample to include all days and fill gaps (e.g., using forward-fill)
df_resampled = data.resample('D').ffill()  # Use 'D' for daily frequency

data.tail(10)

Unnamed: 0_level_0,Price
Timestamp,Unnamed: 1_level_1
2024-10-07,1.0982
2024-10-08,1.0982
2024-10-09,1.0957
2024-10-10,1.0932
2024-10-11,1.0938
2024-10-14,1.0915
2024-10-15,1.0903
2024-10-16,1.0897
2024-10-17,1.0866
2024-10-18,1.0847


In [8]:
# create lagged features

import numpy as np

def create_sequences(data, n_timesteps):
    X = []
    y = []
    for i in range(len(data) - n_timesteps):
        # Input sequence (last n_timesteps of humidity, light, temperature)
        X.append(data[i:i + n_timesteps])
        # Target (humidity and temperature for the next timestep)
        y.append(data.iloc[i+n_timesteps]['Price'])  # 0 for humidity, 2 for temperature
    return np.array(X), np.array(y)

n_timesteps = 5  # Use past 12 time steps (1 Hour) to predict the next step
X, y = create_sequences(data, n_timesteps)

print(X.shape)
print(X[0])
print(y.shape)
print(y[0])

(6603, 5, 1)
[[1.1789]
 [1.179 ]
 [1.1743]
 [1.1632]
 [1.1659]]
(6603,)
1.1569


In [9]:
# create the test and train set
TEST_SPLIT = 0.1

#data = data.drop(columns = ['Wind', 'Solar'])
train_size = int(len(X) * (1-TEST_SPLIT))
test_size = len(X) - train_size

X_train = X[0:train_size]
X_test = X[train_size:len(y)]
y_train = y[0:train_size]
y_test = y[train_size:len(y)]

# Modelling

In [13]:
# create the model

import tensorflow as tf
import keras as keras
from keras.models import Sequential
from keras.layers import *
from keras.callbacks import ModelCheckpoint
from keras.losses import MeanSquaredError
from keras.metrics import RootMeanSquaredError
from keras.optimizers import Adam

model = Sequential()
model.add(InputLayer((n_timesteps, 1))) # 3 features
model.add(LSTM(64))
model.add(Dense(8, 'relu'))
model.add(Dense(2, 'linear'))

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 64)                16896     
                                                                 
 dense_2 (Dense)             (None, 8)                 520       
                                                                 
 dense_3 (Dense)             (None, 2)                 18        
                                                                 
Total params: 17,434
Trainable params: 17,434
Non-trainable params: 0
_________________________________________________________________


In [14]:
cp = ModelCheckpoint('models/model.h5', save_best_only=True)
model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001), metrics=[RootMeanSquaredError()])

In [15]:
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, callbacks=[cp])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1c10ec75c40>