# Libraries

In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import *
from sklearn.metrics import mean_squared_error, r2_score
import pandas_ta as pta
from datetime import datetime, timedelta
# models to be used to check which regressor will work better for the model
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
# Lets disable warnings
import warnings
warnings.filterwarnings("ignore")

# Functions

In [2]:
# Downloading the historical Forex data from yfinance(yahoo)
def download_data(currency_pair, start_date, end_date=None):
    if end_date is None:
        end_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
    forex_data = yf.download(currency_pair, start=start_date, end=end_date)
    print(f"Downloaded forex data shape: {forex_data.shape}")
    return forex_data
#--------------------------------------------------------------------------------------------------------------------#
#--------------------------------------------------------------------------------------------------------------------#
# Prepare the dataset
def prepare_data(df, lookback=5):
    data = pd.DataFrame(df['Close'])
    for i in range(1, lookback + 1):
        data[f'lag_{i}'] = data['Close'].shift(i)
    data.dropna(inplace=True)
    print(f"Prepared data shape: {data.shape}")
    return data
#--------------------------------------------------------------------------------------------------------------------#
#--------------------------------------------------------------------------------------------------------------------#
# Adding technical indicators
def add_technical_indicators(data):
    data['SMA_5'] = pta.sma(data['Close'], length=5)
    data['SMA_10'] = pta.sma(data['Close'], length=10)
    data['SMA_15'] = pta.sma(data['Close'], length=15)
    data['SMA_20'] = pta.sma(data['Close'], length=20)
    data['SMA_25'] = pta.sma(data['Close'], length=25)
    data['SMA_30'] = pta.sma(data['Close'], length=30)
    data['RSI'] = pta.rsi(data['Close'], length=14)
    data['RSX'] = pta.rsx(data['Close'], length=14)
    data["ENTROPY"]=pta.entropy(data['Close'], length=14)
    macd_df = pta.macd(data['Close'], fast=12, slow=26, signal=9)
    if macd_df is None:
        data['MACD'] = 0
        data['MACD_signal'] = 0
    else:
        data['MACD'] = macd_df['MACD_12_26_9'].fillna(0)
        data['MACD_signal'] = macd_df['MACDh_12_26_9'].fillna(0)
    data.fillna(0, inplace=True)
    return data
#--------------------------------------------------------------------------------------------------------------------#
#--------------------------------------------------------------------------------------------------------------------#
# Prepare the latest data for prediction
def prepare_latest_data(data, date, lookback=5):
    latest_data = data.loc[:date].iloc[-(lookback + 1):].copy()
    indicators_data = add_technical_indicators(latest_data)
    print(f"Latest data with indicators shape: {indicators_data.shape}")
    return indicators_data.iloc[-1].drop('Close')
#--------------------------------------------------------------------------------------------------------------------#
#--------------------------------------------------------------------------------------------------------------------#
# Recursive prediction function
def predict_future_prices(model, latest_data, n_days=1):
    future_prices = []
    
    # Create a DataFrame to store the latest data along with the 'Close' column
    latest_data_df = pd.concat([latest_data, pd.Series([0], index=['Close'])])

    for _ in range(n_days):
        next_day_prediction = model.predict(latest_data_df.drop('Close').to_frame().T)
        future_prices.append(next_day_prediction[0])

        # Shift the latest data and replace the last Close value with the predicted value
        latest_data_df = latest_data_df.shift(-1)
        latest_data_df.iloc[-1] = next_day_prediction[0]

        # Update the technical indicators
        latest_data_df = add_technical_indicators(latest_data_df.to_frame().T).iloc[-1]

    return future_prices

# Inputs

In [3]:
# Download and prepare the data
#https://uk.finance.yahoo.com/currencies/
currency_pair = input("Enter the currency pair : ")
start_date = input("Enter start date (YYYY-MM-DD) for training dataset: ")
end_date = input("Enter end date (YYYY-MM-DD) for training dataset: ")
n_days = eval(input("Number of days you want to predict from current date : "))

#default values
if currency_pair =='':
    currency_pair = "EURUSD=X"
if start_date =='':
    start_date = "2000-01-01"
if end_date == '':
    end_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')

data = download_data(currency_pair, start_date, end_date)
prepared_data = prepare_data(data)
# Add technical indicators to the dataset
prepared_data_with_indicators = add_technical_indicators(prepared_data)

Enter the currency pair :  
Enter start date (YYYY-MM-DD) for training dataset:  
Enter end date (YYYY-MM-DD) for training dataset:  
Number of days you want to predict from current date :  1


[*********************100%***********************]  1 of 1 completed
Downloaded forex data shape: (5019, 6)
Prepared data shape: (5014, 6)


In [4]:
# Define the models
models = [
    ('Linear Regression', LinearRegression()),
    ('Ridge Regression', Ridge()),
    ('Lasso Regression', Lasso()),
    ('Elastic Net Regression', ElasticNet()),
    ('Support Vector Regression', SVR()),
    ('Decision Tree Regression', DecisionTreeRegressor()),
    ('Random Forest Regression', RandomForestRegressor()),
    ('Gradient Boosting Regression', GradientBoostingRegressor()),
    ('XGBoost', XGBRegressor()),
    ('LightGBM', LGBMRegressor()),
    ('CatBoost', CatBoostRegressor(silent=True))
]

X = prepared_data_with_indicators.drop('Close', axis=1)
y = prepared_data_with_indicators['Close']

best_model = None
best_model_name = ''
best_r2_score = float('-inf')

# Evaluate the models using cross-validation
for name, model in models:
    cv_rmse = np.sqrt(-cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error'))
    cv_r2 = cross_val_score(model, X, y, cv=5, scoring='r2')
    print(f"{name} - CV RMSE: {cv_rmse.mean()}, CV R-squared score: {cv_r2.mean()}")
    if cv_r2.mean() > best_r2_score:
        best_model = model
        best_model_name = name
        best_r2_score = cv_r2.mean()
print(f"\nBest performing model: {best_model_name} with R-squared score: {best_r2_score}")

Linear Regression - CV RMSE: 0.08138635904186967, CV R-squared score: -7.732671412022681
Ridge Regression - CV RMSE: 0.021981702528829954, CV R-squared score: 0.6063725430583082
Lasso Regression - CV RMSE: 0.13853174184758335, CV R-squared score: -4.052882235144995
Elastic Net Regression - CV RMSE: 0.13853174184758335, CV R-squared score: -4.052882235144995
Support Vector Regression - CV RMSE: 0.07681533346379583, CV R-squared score: -0.5590274740003016
Decision Tree Regression - CV RMSE: 0.020750093253417163, CV R-squared score: 0.8717983828553673
Random Forest Regression - CV RMSE: 0.01838119488862746, CV R-squared score: 0.8971404247870236
Gradient Boosting Regression - CV RMSE: 0.01903342229960044, CV R-squared score: 0.8876484468128261
XGBoost - CV RMSE: 0.019162700995410276, CV R-squared score: 0.8842976219888377
LightGBM - CV RMSE: 0.020049568221448353, CV R-squared score: 0.8722949302823133
CatBoost - CV RMSE: 0.02044374174047932, CV R-squared score: 0.8667547955905676

Best pe

In [5]:
# Train and test the model
def train_test_model(data, target_column, test_size=0.2,model=best_model):
    X = data.drop(target_column, axis=1)
    y = data[target_column]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(f"RMSE: {rmse}")
    print(f"R-squared score: {r2}")

    # Perform cross-validation and report mean squared error and R-squared score
    cv_rmse = np.sqrt(-cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error'))
    cv_r2 = cross_val_score(model, X, y, cv=5, scoring='r2')
    print(f"CV RMSE: {cv_rmse.mean()}")
    print(f"CV R-squared score: {cv_r2.mean()}")

    return model

In [7]:
model = train_test_model(prepared_data_with_indicators, 'Close')

# Download the latest data
#today_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
#latest_data = download_data(currency_pair, start_date, today_date)

# Prepare the latest data for prediction
latest_prepared_data = prepare_latest_data(data, end_date)
# Predict the forex prices for the next n days after the future date
future_prices = predict_future_prices(model, latest_prepared_data, n_days=n_days)
print(f"Predicted forex prices for the next {n_days} day/days after {end_date} for currency {currency_pair} is : {future_prices}")


RMSE: 0.008244074300426259
R-squared score: 0.9955969965849754
CV RMSE: 0.018195495618730868
CV R-squared score: 0.8948553329121731
Latest data with indicators shape: (6, 17)


Number of days you want to predict from current date :  1


Predicted forex prices for the next 1 day/days after 2023-04-06 for currency EURUSD=X is : [1.0845690381526947]
