## Installing all the necessary Packages

In [None]:
!pip install --upgrade pip

In [None]:
# First upgrade the environment.
from subprocess import run
# Add & Upgrade whatever you will need
modules =[
    #'yfinance',    # Required for Yahoo Finance model
    'fredapi',     # Required to access FRED Data for MacroEconomics indices
    'ta'           # Required for the MACD indicator
]
proc = run(f'pip install {" ".join(modules)} --upgrade --no-input', 
       shell=True, 
       text=True, 
       capture_output=True, 
       timeout=120) #a couple of minutes
print(proc.stderr)

In [None]:
# Text Formating {Fore Colors and Background Colors definition}
RESET = '\033[0m'
BOLD  = '\033[1m'
ITALIC= "\033[3m"
ULINE = '\033[4m'
BLINK = "\033[5m"
NEGATIVE = "\033[7m"

RED   = '\033[31m'
GREEN = '\033[32m'
YELLOW= '\033[33m'
BLUE  = '\033[34m'
PURPLE= '\033[35m'
CYAN  = '\033[36m'
WHITE = '\033[97m'

BgGREEN = '\033[42m'
BgYELLOW= '\033[43m'
BgBLUE  = '\033[44m'
BgWHITE = '\033[47m'

## Import All the General and commonly used Packages

In [None]:
# Import all the common modules
import os
import warnings
import requests
import numpy as np
import pandas as pd
from fredapi import Fred
from ta.trend import MACD
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler            #, StandardScaler, Normalizer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

## Defining the reqired funtions

In [None]:
# Calculate Relative Strength Index (RSI)
def Calculate_RSI(data, window=14):
    delta = data.diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

In [None]:
# Calculate Exponential Moving Average (EMA)
def Calculate_EMA(data, n):
    ema_values = [data[0]]  # Initialize EMA list with the first value
    smoothing_factor = 2 / (n + 1)
    
    for i in range(1, len(data)):
        ema = (data[i] * smoothing_factor) + (ema_values[i - 1] * (1 - smoothing_factor))
        ema_values.append(ema)
    
    return ema_values

In [None]:
# Calculate Average True Range (ATR)
def Calculate_ATR(df, period=14):
    df['High-Low'] = df['High'] - df['Low']
    df['High-PrevClose'] = (df['High'] - df['Price'].shift(1)).abs()
    df['Low-PrevClose'] = (df['Low'] - df['Price'].shift(1)).abs()
    df['TR'] = df[['High-Low', 'High-PrevClose', 'Low-PrevClose']].max(axis=1)
    df['ATR'] = df['TR'].rolling(window=period, min_periods=1).mean()

    # Clean up the DataFrame
    df.drop(columns=['High-Low', 'High-PrevClose', 'Low-PrevClose', 'TR'], inplace=True)
    
    return df


In [None]:
# Calculate On Balance Volume (OBV)
def Get_Obv(df):
    df['Daily Change'] = df['Price'].diff()
    df['Volume Direction'] = np.where(df['Daily Change'] >= 0, 1, -1)
    df['OBV'] = df['Volume Direction'] * df['Volume']
    df.fillna(0, inplace=True)
    df['OBV'] = df['OBV'].cumsum()
    df.drop(['Daily Change', 'Volume Direction'], axis=1, inplace=True)

In [None]:
def FillGaps(StartingDate, EndingDate, StartVal, EndVal):
    import math

    # Generate business days for the year
    Business_Days = pd.date_range(start=StartingDate+timedelta(days=1, hours=0), end=EndingDate-timedelta(days=1, hours=0), freq='B')

    # Calculate the incremental value
    SVal = value = float(StartVal)
    EVal = value = float(EndVal)
    if (math.isnan(SVal)):
        Incremental_Value = 0
    elif ((not math.isnan(SVal)) & (math.isnan(EVal))):
        Incremental_Value = StartVal
    elif ((not math.isnan(SVal)) & (not math.isnan(EVal))):
        Incremental_Value = (EndVal - StartVal) / (len(Business_Days) + 1)

    return Incremental_Value

In [None]:
# Function to fetch MacroEconomics data from FRED
def Fetch_Series(APIKey, IndName, Series_ID):
    #StartDate = pd.to_datetime('1982.01.01')
    StartDate = MinDate
    EndDate = pd.Timestamp.today().normalize()
    Data_Dict = {}
    try:
        Data = APIKey.get_series(Series_ID, StartDate, EndDate)
        Data_Dict[IndName] = Data
        Df = pd.DataFrame(Data_Dict)
        Df.reset_index(inplace=True)
        Df.rename(columns={'index': 'Date'}, inplace=True)
        Df.sort_values(by=['Date'], ascending=[True], inplace=True)
        MaxDate = Df.Date.max()
        DfTmp = Df.copy()
        
        # MacroEconomic data is maintained on Monthly and Quarterly Bases.
        # The following code will fill the Daily gaps in the data with the calibrated incremental values.
        for row in range(0, len(Df)-1):
            StartDate = DfTmp.iloc[row, 0]
            StartVal = DfTmp.iloc[row, 1]
            EndDate = DfTmp.iloc[row+1, 0]
            EndVal = DfTmp.iloc[row+1, 1]
    
            IncVal = FillGaps(StartDate, EndDate, StartVal, EndVal)
            Business_Days = pd.date_range(start=StartDate+timedelta(days=1, hours=0), end=EndDate-timedelta(days=1, hours=0), freq='B')
            Val = StartVal
            for BDay in Business_Days:
                Val = Val + IncVal
                Df.loc[len(Df.index)] = [BDay, Val]
    
        if (MaxDate < pd.Timestamp.today().normalize()):
            StartDate = MaxDate
            EndDate = pd.Timestamp.today().normalize()
            Business_Days = pd.date_range(start=StartDate+timedelta(days=1, hours=0), end=EndDate-timedelta(days=0, hours=0), freq='B')
            Val = EndVal
            for BDay in Business_Days:
                Val = Val + IncVal
                Df.loc[len(Df.index)] = [BDay, Val]
    except Exception as e:
        print(f"Error fetching {IndName}: {e}")
    return Df

In [None]:
def TestTheModel(Model):
    # Checking how good this Model will be
    # Forecast day(s) ahead based on Test Data
    y_Train_Pred = Model.predict(X_train)
    y_Test_Pred = Model.predict(X_test)
    
    r2_Train = 0.00
    r2_Test = 0.00
    # Calculate R² error on Training Data and Test Data
    r2_Train = r2_score(y_train, y_Train_Pred)
    r2_Test = r2_score(y_test, y_Test_Pred)
    
    # Display results
    print(f"R² error on Training Data: {r2_Train:.5f}")
    print(f"R² error on Test Data: {r2_Test:.5f}")
    
    # Confirm that R² error on Training Data is better than R² error on Test Data
    if (r2_Train >= r2_Test) & ((1.00 - r2_Train) < 0.1) & ((r2_Train - r2_Test) < 0.2):
        if (r2_Test == 1.00):
            Comment = 'Excellent Model!'
            print(f'{BOLD+GREEN+BgWHITE}{Comment}{RESET}\n{BOLD+ITALIC+NEGATIVE+BgGREEN}R² error on Training Data equals R² error on Test Data.{RESET}')
        elif (r2_Train > r2_Test) & ((1.00 - r2_Train) < 0.0001) & ((r2_Train - r2_Test) < 0.001):
            Comment = 'Very Good Model!'
            print(f'{BOLD+GREEN+BgWHITE}{Comment}{RESET}\n{BOLD+ITALIC+NEGATIVE+BgGREEN}R² error on Training Data is better than R² error on Test Data.{RESET}')
        elif (r2_Train > r2_Test) & ((1.00 - r2_Train) < 0.001) & ((r2_Train - r2_Test) < 0.01):
            Comment = 'Good Model!'
            print(f'{BOLD+GREEN+BgWHITE}{Comment}{RESET}\n{BOLD+ITALIC+NEGATIVE+BgGREEN}R² error on Training Data is better than R² error on Test Data.{RESET}')
        else:
            Comment = 'Fairly Good Model.'
            print(f'{BOLD+BLUE+BgWHITE}{Comment}{RESET}\n{BOLD+ITALIC+NEGATIVE+BgBLUE}R² error on Training Data is better than R² error on Test Data.{RESET}')
    else:
        Comment = 'Bad Model.'
        print(f'{BOLD+RED+BLINK+BgYELLOW}{Comment}{RESET}')
    return r2_Train, r2_Test, Comment

In [None]:
def Plot_TrainTest(CrncyPair, Y_Train, Y_Test):
    # Plot Train and Test prices
    plt.figure(figsize=(10, 6))
    plt.plot(Y_Train.index, Y_Train.values, label='Training Data', color='blue')
    plt.plot(Y_Test.index, Y_Test.values, label='Test Data', color='orange')
    plt.title('The Train/Test data for ' + CrncyPair + ' Forecasting')
    plt.xlabel('Date')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.show()

In [None]:
def Rpt_Evaluate_Plot(CrncyPair, Model_Name, Model_Code, R2_Train, R2_Test, Comment, Y_Train, Y_Test, Y_Pred, MaxDate, Future_Preds):
    # Evaluate the model
    mae = mean_absolute_error(Y_Test, Y_Pred)
    print("Mean Absolute Error: {:.5f}".format(mae))
    # ============================================
    mse = mean_squared_error(Y_Test, Y_Pred)
    print("Mean Squared Error: {:.5f}".format(mse))
    # ===================================================
    rmse = np.sqrt(mse)
    print('Root Mean Squared Error: {:.5f}'.format(rmse))
    # ============================================
    r2 = r2_score(Y_Test, Y_Pred)
    print("R-squared (R2) score: {:.5f}".format(r2))
    # ============================================
    print('Future Predictions', Future_Preds)

    # Plotting the graph
    Future_Dates = pd.date_range(start=MaxDate+timedelta(days=1), periods=len(Future_Preds), freq="B")

    # Plot Actual vs Predicted prices
    plt.figure(figsize=(12, 6))
    plt.plot(Future_Dates, ActualValues, label='Actual Data', color='green')
    plt.plot(Future_Dates, Future_Preds, label='Predicted Data', color='blue')
    plt.scatter(Future_Dates, ActualValues, s=10, color='red')
    plt.scatter(Future_Dates, Future_Preds, s=10, color='red')
    plt.title(Model_Name + ': Actual vs Predicted values for ' + CrncyPair + ' Forecasting')
    plt.xlabel('Date')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.show()

    # Inserting Record into the DataFrame
    Models_Compare.loc[len(Models_Compare.index)] = [Model_Name, Model_Code, mae, mse, rmse, R2_Train, R2_Test, Comment]

## Load the Currency Pair into DataFrame

In [None]:
TDSData = []
Investing = []

# Suppress all warnings
warnings.filterwarnings("ignore")

# Today's Date
TDate = pd.Timestamp.today().normalize()

In [None]:
# Every Currency Pair is a combination of Major and Minor currency
CrncyPair = 'USDCAD'
Major = CrncyPair[0:3]
Minor = CrncyPair[3:]

In [None]:
%%time
# My GitHub Data path
MyGitHubURL = 'https://raw.githubusercontent.com/Akubue4U/PyProjects/main/MyBDAThesis/'

# DATA SOURCE 01 (Investing Data stored in GitHub)
# ================================================
# Reading data from csv file, which was uploaded to GitHub.
# Data was downloaded from the link below, and saved as .csv
# https://www.investing.com/currencies/usd-cad-historical-data
DataFolder = 'InvestingData/'
Base_URL = MyGitHubURL + DataFolder
InvCrncyPair = Major +'_'+ Minor
InvFileName = InvCrncyPair + '_Historical_Data'

# Check if the URL exists and is accessible
SuccessCode = [200, 400]
response = requests.head(Base_URL, timeout=5)
try:
    #if response.status_code in SuccessCode:
    File_URL = f'{Base_URL}{InvFileName}.csv'
    InvestingData = pd.read_csv(File_URL)
    InvestingData.drop(InvestingData.columns[[6]], axis=1, inplace=True)
    InvestingData['Vol.'] = InvestingData['Vol.'].fillna(0)

    # Convert Date string column to Date type
    InvestingData['Date'] = pd.to_datetime(InvestingData['Date'], format='%m/%d/%Y')

    # Set column 'Date' as the index
    InvestingData.set_index('Date', inplace=True)

    # Re-Arrange the columns to allign with that from Source 02 below.
    # Using indexing notation
    InvestingData = InvestingData[['Open', 'High', 'Low', 'Price', 'Vol.']]
    InvestingData.rename(columns={"Vol.": "Volume"}, inplace=True)
    
    # Some of the data is formated in Thousands, thus having Commas.
    # Remove commas and convert to float for specific columns
    for Col in ['Price', 'Open', 'High', 'Low', 'Volume']:
        if pd.api.types.is_object_dtype(InvestingData[Col]):
            InvestingData[Col] = InvestingData[Col].str.replace(',', '').astype(float)

    # Also sort the Data by Dates to allign with Source 02 below.
    InvestingData.sort_values(by=['Date'], ascending=[True], inplace=True)
    DataSource = InvestingData.copy()
    print("Data Source in use is Investing.Com")
except Exception as e:
    #print(f"Error accessing GitHub path [{Base_URL}]: \n{e}")
    # DATA SOURCE 02 (Tick Data Suite (TDS) stored in GitHub)
    # ======================================================
    # Reading data frame from csv file
    DataFolder = 'TDS-Data/'
    Base_URL = MyGitHubURL + DataFolder
    Symbol = Major + Minor +'_'

    # Check if the URL exists and is accessible
    response = requests.head(Base_URL, timeout=5)
    try:
        #if response.status_code in SuccessCode:
        # List to store DataFrames
        Df_List = []

        # Reading over 7,903,117 M1 data from GitHub
        # Iterate through the years and read each .csv file into a DataFrame
        for Year in range(2003, 2025):
            File_URL = f'{Base_URL}{Symbol}{Year}.csv'
            Df = pd.read_csv(File_URL)

            Df_List.append(Df)

        # Concatenate all DataFrames into one
        CombinedDf = pd.concat(Df_List, ignore_index=True)
        # Merge Date string and Time string, and convert to DateTime type
        CombinedDf['DateTime'] = pd.to_datetime((CombinedDf['Date'] + " " + CombinedDf['Time']), format='%Y.%m.%d %H:%M:%S')
        CombinedDf.drop(columns={'Date', 'Time'}, inplace=True)    
        CombinedDf.set_index(CombinedDf['DateTime'], inplace=True)

        # Stage 02
        # Subsample data to one-day time steps, including weekends
        DataOpen = CombinedDf.Open.resample('1D').first()
        DataHigh = CombinedDf.High.resample('1D').max()
        DataLow = CombinedDf.Low.resample('1D').min()
        DataClose = CombinedDf.Close.resample('1D').last()
        DataVolume = round(CombinedDf.TickVol.resample('1D').sum(), 2)

        # Merge the Data on a common key, Date
        TDSData = pd.merge(DataOpen, DataHigh, how='left', on=['DateTime'])
        TDSData = pd.merge(TDSData, DataLow, how='left', on=['DateTime'])
        TDSData = pd.merge(TDSData, DataClose, how='left', on=['DateTime'])
        TDSData = pd.merge(TDSData, DataVolume, how='left', on=['DateTime'])

        # Rename the index column to Date
        TDSData.reset_index(inplace=True)
        TDSData.rename(columns={'DateTime': 'Date', 'Close': 'Price', 'TickVol': 'Volume'}, inplace=True)
        TDSData.set_index('Date', inplace=True)
        TDSData.dropna(inplace = True)

        # Also sort the Data by Dates to allign with Source 01 above.
        TDSData.sort_values(by=['Date'], ascending=[True], inplace=True)
        DataSource = TDSData.copy()
        print("Data Source in use is TDS.")
    except Exception as e:
        print(f"Error accessing GitHub path [{Base_URL}]: \n{e}")
MinDate = DataSource.index.min()        

In [None]:
# Free the momory by deleting the DataFrames no longer needed
del TDSData
del Investing

In [None]:
DataSource.tail(7).sort_values(by=['Date'], ascending=[False])

In [None]:
print(len(DataSource))

#### Adding the Statistical Data to the DataSource

In [None]:
# Feature Engineering: Calculate rolling averages, momentum indicators, etc.
DataSource['SMA_20'] = DataSource['Price'].rolling(window=20).mean()
DataSource['SMA_50'] = DataSource['Price'].rolling(window=50).mean()
DataSource['RSI'] = Calculate_RSI(DataSource['Price'], window=14)
DataSource['EMA_12'] = Calculate_EMA(DataSource['Price'], 12)
DataSource['EMA_26'] = Calculate_EMA(DataSource['Price'], 26)
DataSource['MACD'] = MACD(DataSource['Price']).macd()

# Drop rows with missing values
DataSource.dropna(inplace=True)

In [None]:
print(len(DataSource))

#### Get the MacroEconomics Data

In [None]:
# My FRED API key
MyAPIKey = Fred(api_key='fb871b8abb01c472840cdd407ffd8dca')

# This is the list of Indices we want to retrieve for USA and Canada
MacroEconomic_Series_USA = {
    'CPI': 'CPIAUCSL',  # Consumer Price Index (USA)
    'IntRate': 'FEDFUNDS',  # Federal Funds Rate (USA)
    'UnEmployRate': 'UNRATE',  # Unemployment Rate (USA)
    'Imports': 'IMP0004',  # Imports (USA)
    'Exports': 'EXP0004'  # Exports (USA)
}

MacroEconomic_Series_Canada = {
    'CPI': 'CANCPIALLMINMEI',  # Consumer Price Index (Canada)
    'IntRate': 'IR3TIB01CAM156N',  # Interest Rate (Canada)
    'UnEmployRate': 'LRUNTTTTCAM156S',  # Unemployment Rate (Canada)
    'Imports': 'XTIMVA01CAM667S',  # Imports (Canada)
    'Exports': 'XTEXVA01CAM667S'  # Exports (Canada)
}

In [None]:
%%time
# Get the MacroEconomics Data
# Fetch data for USA
USA_Data = []
for IndName, Series_ID in MacroEconomic_Series_USA.items():
    Data = Fetch_Series(MyAPIKey, IndName, Series_ID)
    if (len(USA_Data) ==  0):
        USA_Data = Data.copy()
    else:
        USA_Data = pd.merge(USA_Data, Data, on='Date')
USA_Data.rename(columns={"CPI": "USA_CPIs", "IntRate": "USA_IntRates", "UnEmployRate": "USA_UnEmployRates", "Imports": "USA_Imports", "Exports": "USA_Exports"}, inplace=True)
#USA_Data = USA_Data.drop_duplicates()


# Fetch data for Canada
Canada_Data = []
for IndName, Series_ID in MacroEconomic_Series_Canada.items():
    Data = Fetch_Series(MyAPIKey, IndName, Series_ID)
    if (len(Canada_Data) ==  0):
        Canada_Data = Data.copy()
    else:
        Canada_Data = pd.merge(Canada_Data, Data, on='Date')
Canada_Data.rename(columns={"CPI": "CAN_CPIs", "IntRate": "CAN_IntRates", "UnEmployRate": "CAN_UnEmployRates", "Imports": "CAN_Imports", "Exports": "CAN_Exports"}, inplace=True)
#Canada_Data = Canada_Data.drop_duplicates()

# Merge the MacroEconomic datasets on a common key, such as Date
MacroEcons = pd.merge(USA_Data, Canada_Data, on='Date')

# Free the memory.
#del USA_Data, Canada_Data

#### Mering the MacroEconomics Data with the DataSource

In [None]:
%%time
# Merge the DataSource with the MacroEconomic dataset on a common key, such as Date
Complete_Data = pd.merge(DataSource, MacroEcons, how='left', on=['Date'])

# Set NaN to Zero, and column 'Date' as the index
Complete_Data.fillna(0, inplace=True)
Complete_Data.set_index('Date', inplace=True)

# Free the memory.
#del DataSource, MacroEcons

## The Models

In [None]:
# Write the Performance Metrics to a DataFrame for easy comparism.
# Define column names
Cols = ['ModelName', 'ModelCode', 'MAE', 'MSE', 'RMSE', 'R2_Train', 'R2_Test', 'Comment']

# Create DataFrame
Models_Compare = pd.DataFrame(columns = Cols)

In [None]:
# Model Common/Global parameters & Modules
# Here, I choose to use 85% of my data for Training and the remaing 15% for Testing.
TestPercent = 0.15
TrainPercent = 1.0 - TestPercent

# Predictions
PredDays = 5

#### Creating Label column for the Forecast

In [None]:
# Creating 'Label' column for the forecast
Complete_Data['Label'] = Complete_Data['Price'].shift(-(PredDays))
Complete_Data.dropna(inplace=True)
MaxDate = Complete_Data.index.max()

ActualValues = Complete_Data['Label'][-(PredDays):].values

In [None]:
Complete_Data.tail(8).sort_values(by=['Date'], ascending=[False])

#### Split the Data, Scale, and Plot Train/Test

In [None]:
#Split data into train/test (85/15 ratio) with caution for time series
Complete_Data.dropna(inplace=True)
Split_Position = int(len(Complete_Data) * TrainPercent)
Train_Data, Test_Data = Complete_Data.iloc[:Split_Position], Complete_Data.iloc[Split_Position:]

In [None]:
#Initialize the MinMaxScaler
Scaler = MinMaxScaler(feature_range=(0, 1))

# Fit the scaler only on the training data
Scaler.fit(Train_Data)

# Transform both the training and test data using the same scaler
Train_Data_Scaled = Scaler.transform(Train_Data)
Test_Data_Scaled = Scaler.transform(Test_Data)

In [None]:
# Convert the scaled data back into a DataFrame for easy inspection
Train_Data_Scaled_Df = pd.DataFrame(Train_Data_Scaled, index=Train_Data.index, columns=Train_Data.columns)
Test_Data_Scaled_Df = pd.DataFrame(Test_Data_Scaled, index=Test_Data.index, columns=Test_Data.columns)

# Handle missing values
Train_Data_Scaled_Df.dropna(inplace=True)
Test_Data_Scaled_Df.dropna(inplace=True)

In [None]:
# Match into Training and Testing data
X_train, y_train = Train_Data_Scaled_Df, Train_Data['Label']
X_test, y_test = Test_Data_Scaled_Df, Test_Data['Label']

In [None]:
# Plotting the Train/Test Data
Plot_TrainTest(CrncyPair, y_train, y_test)

#### Linear Regression Model

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
# Instantiate Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

In [None]:
# Checking how good this Model will be
R2_Train, R2_Test, Comment = TestTheModel(model)

In [None]:
%%time
# Make predictions on the testing set
y_pred = model.predict(X_test)
future_predictions = y_pred[-(PredDays):]
LNR = future_predictions

# Evaluate and Report Accuracy, Predict the future, and Plot the graph.
ModelCode = 'LNR'
ModelName = 'Linear Regression'
Rpt_Evaluate_Plot(CrncyPair, ModelName, ModelCode, R2_Train, R2_Test, Comment, y_train, y_test, y_pred, MaxDate, future_predictions)

# Delete the following objects so as to free the memory, and also Re-Set the objects for the next Model.
del model, y_pred, future_predictions

#### Random Forest Regressor Model

In [None]:
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingClassifier

In [None]:
# Train the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=None)
model.fit(X_train, y_train)

In [None]:
# Checking how good this Model will be
R2_Train, R2_Test, Comment  = TestTheModel(model)

In [None]:
%%time
# Evaluate the model
y_pred = model.predict(X_test)
future_predictions = y_pred[-(PredDays):]
RFR = future_predictions

# Evaluate and Report Accuracy, Predict the future, and Plot the graph.
ModelCode = 'RFR'
ModelName = 'Random Forest Regressor'
Rpt_Evaluate_Plot(CrncyPair, ModelName, ModelCode, R2_Train, R2_Test, Comment, y_train, y_test, y_pred, MaxDate, future_predictions)

# Delete the following objects so as to free the memory, and also Re-Set the objects for the next Model.
del model, y_pred, future_predictions

#### XGBoost Model

In [None]:
import xgboost as xgb

In [None]:
# Define XGBoost parameters
params = {
    'objective': 'reg:squarederror',   # Use squared error loss for regression
    'max_depth': 6,                    # Maximum depth of trees
    'learning_rate': 0.1,              # Learning rate {0.1}
    'n_estimators': 100,               # Number of boosting rounds (trees)
    'seed': 42                         # Random seed for reproducibility {42}
}

# Instantiate XGBoost regressor
xgb_model = xgb.XGBRegressor(**params)

# Train the model
xgb_model.fit(X_train, y_train)

In [None]:
# Checking how good this Model will be
R2_Train, R2_Test, Comment = TestTheModel(xgb_model)

In [None]:
%%time
# Make predictions on the testing set
y_pred = xgb_model.predict(X_test)
future_predictions = y_pred[-(PredDays):]
XGB = future_predictions

# Evaluate and Report Accuracy, Predict the future, and Plot the graph.
ModelCode = 'XGB'
ModelName = 'XGBoost Regression'
Rpt_Evaluate_Plot(CrncyPair, ModelName, ModelCode, R2_Train, R2_Test, Comment, y_train, y_test, y_pred, MaxDate, future_predictions)

# Delete the following objects so as to free the memory, and also Re-Set the objects for the next Model.
del xgb_model, y_pred, future_predictions

#### Ridge Regression Model

In [None]:
from sklearn.linear_model import Ridge

In [None]:
# Instantiate and fit the Ridge Regression model
alpha = 0.1  # Regularization strength (hyperparameter to be tuned)
model = Ridge(alpha=alpha)
model.fit(X_train, y_train)

In [None]:
# Checking how good this Model will be
R2_Train, R2_Test, Comment = TestTheModel(model)

In [None]:
%%time
# Make predictions on the testing set
y_pred = model.predict(X_test)
future_predictions = y_pred[-(PredDays):]
RIR = future_predictions

# Evaluate and Report Accuracy, Predict the future, and Plot the graph.
ModelCode = 'RIR'
ModelName = 'Ridge Regression'
Rpt_Evaluate_Plot(CrncyPair, ModelName, ModelCode, R2_Train, R2_Test, Comment, y_train, y_test, y_pred, MaxDate, future_predictions)

# Delete the following objects so as to free the memory, and also Re-Set the objects for the next Model.
del model, y_pred, future_predictions

#### Lasso Regression Model

In [None]:
from sklearn.linear_model import Lasso

In [None]:
# Instantiate and fit the Lasso Regression model
alpha = 0.1  # Regularization strength (hyperparameter to be tuned)
model = Lasso(alpha=alpha)
model.fit(X_train, y_train)

In [None]:
# Checking how good this Model will be
R2_Train, R2_Test, Comment = TestTheModel(model)

In [None]:
%%time
# Make predictions on the testing set
y_pred = model.predict(X_test)
future_predictions = y_pred[-(PredDays):]
LAR = future_predictions

# Evaluate and Report Accuracy, Predict the future, and Plot the graph.
ModelCode = 'LAR'
ModelName = 'Lasso Regression'
Rpt_Evaluate_Plot(CrncyPair, ModelName, ModelCode, R2_Train, R2_Test, Comment, y_train, y_test, y_pred, MaxDate, future_predictions)

# Delete the following objects so as to free the memory, and also Re-Set the objects for the next Model.
del model, y_pred, future_predictions

#### Support Vector Regression (SVR) Model

In [None]:
from sklearn.svm import SVR

In [None]:
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.2)
svr_model.fit(X_train, y_train)

In [None]:
# Checking how good this Model will be
R2_Train, R2_Test, Comment = TestTheModel(svr_model)

In [None]:
%%time
# Make predictions on the testing set
y_pred = svr_model.predict(X_test)
future_predictions = y_pred[-(PredDays):]
SVR = future_predictions

# Evaluate and Report Accuracy, Predict the future, and Plot the graph.
ModelCode = 'SVR'
ModelName = 'Support Vector Regression'
Rpt_Evaluate_Plot(CrncyPair, ModelName, ModelCode, R2_Train, R2_Test, Comment, y_train, y_test, y_pred, MaxDate, future_predictions)

# Delete the following objects so as to free the memory, and also Re-Set the objects for the next Model.
del svr_model, y_pred, future_predictions

#### Bayesian Ridge Regression Model

In [None]:
from sklearn.linear_model import BayesianRidge

In [None]:
Baye_model = BayesianRidge()
Baye_model.fit(X_train, y_train)

In [None]:
# Checking how good this Model will be
R2_Train, R2_Test, Comment = TestTheModel(Baye_model)

In [None]:
%%time
# Make predictions on the testing set
y_pred = Baye_model.predict(X_test)
future_predictions = y_pred[-(PredDays):]
BRR = future_predictions

# Evaluate and Report Accuracy, Predict the future, and Plot the graph.
ModelCode = 'BRR'
ModelName = 'Bayesian Ridge Regression'
Rpt_Evaluate_Plot(CrncyPair, ModelName, ModelCode, R2_Train, R2_Test, Comment, y_train, y_test, y_pred, MaxDate, future_predictions)

# Delete the following objects so as to free the memory, and also Re-Set the objects for the next Model.
del Baye_model, y_pred, future_predictions

#### AdaBoost Regression Model

In [None]:
from sklearn.ensemble import AdaBoostRegressor

In [None]:
AdaB_model = AdaBoostRegressor(n_estimators=100, random_state=None)
AdaB_model.fit(X_train, y_train)

In [None]:
# Checking how good this Model will be
R2_Train, R2_Test, Comment = TestTheModel(AdaB_model)

In [None]:
%%time
# Make predictions on the testing set
y_pred = AdaB_model.predict(X_test)
future_predictions = y_pred[-(PredDays):]
ABR = future_predictions

# Evaluate and Report Accuracy, Predict the future, and Plot the graph.
ModelCode = 'ABR'
ModelName = 'AdaBoost Regression'
Rpt_Evaluate_Plot(CrncyPair, ModelName, ModelCode, R2_Train, R2_Test, Comment, y_train, y_test, y_pred, MaxDate, future_predictions)

# Delete the following objects so as to free the memory, and also Re-Set the objects for the next Model.
del AdaB_model, y_pred, future_predictions

## Summary of the model results

In [None]:
Models_Compare

In [None]:
Models_Compare[['ModelName','MAE','MSE','RMSE','R2_Train','R2_Test', 'Comment']]

In [None]:
# Plotting the graph
Future_Dates = pd.date_range(start=MaxDate+timedelta(days=1), periods=PredDays, freq="B")

# Plot Actual vs Predicted prices
plt.figure(figsize=(12, 6))
plt.plot(Future_Dates, ActualValues, label='Actual Data', color='green')
plt.plot(Future_Dates, LNR, label='Linear Regression', color='blue')
plt.plot(Future_Dates, RFR, label='Random Forest Regressor', color='red')
plt.plot(Future_Dates, XGB, label='XGBoost Regression', color='yellow')
plt.plot(Future_Dates, RIR, label='Ridge Regression', color='orange')
#plt.plot(Future_Dates, LAR, label='Lasso Regression', color='black')
#plt.plot(Future_Dates, SVR, label='Support Vector Regression', color='magenta')
plt.plot(Future_Dates, BRR, label='Bayesian Ridge Regression', color='cyan')
plt.plot(Future_Dates, ABR, label='AdaBoost Regression', color='purple')
plt.scatter(Future_Dates, ActualValues, s=10, color='red')
plt.scatter(Future_Dates, LNR, s=10, color='red')
plt.scatter(Future_Dates, RFR, s=10, color='red')
plt.scatter(Future_Dates, XGB, s=10, color='red')
plt.scatter(Future_Dates, RIR, s=10, color='red')
#plt.scatter(Future_Dates, LAR, s=10, color='red')
#plt.scatter(Future_Dates, SVR, s=10, color='red')
plt.scatter(Future_Dates, BRR, s=10, color='red')
plt.scatter(Future_Dates, ABR, s=10, color='red')
plt.title('Actual vs All Predicted Models for ' + CrncyPair + ' Forecasting')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.show()

In [None]:
# Free the Memory
#del FxPair_Data, X, y, X_train, X_test, y_train, y_test