In [None]:
import subprocess
libraries_to_install = ['mlxtend', 'ipynb']

for library in libraries_to_install:
    subprocess.run(["pip", "install", library], check=True)

In [None]:
from sklearn.linear_model import LogisticRegression
from joblib import dump, load, Parallel, delayed
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
import pandas as pd
from mlxtend.evaluate import PredefinedHoldoutSplit
import numpy as np
import glob, os
import random
random.seed(42)
from joblib import dump, load
from ipynb.fs.full.Data_Preparation import getFilenames
from ipynb.fs.full.Data_Preparation import getProcessedData

In [None]:
# Define a list of currency pair tickers
fx_tic = ['EURNZD',
 'USDCHF',
 'EURCHF',
 'USDEUR',
 'USDGBP',
 'EURNOK',
 'EURSEK',
 'USDNZD',
 'USDCAD',
 'EURDKK',
 'USDSEK',
 'USDJPY',
 'USDDKK',
 'EURJPY',
 'EURCAD',
 'EURGBP',
 'USDNOK']

# Create a dictionary that maps each currency pair ticker to the index
fx_dic = {'EURNZD': 0,
 'USDCHF': 1,
 'EURCHF': 2,
 'USDEUR': 3,
 'USDGBP': 4,
 'EURNOK': 5,
 'EURSEK': 6,
 'USDNZD': 7,
 'USDCAD': 8,
 'EURDKK': 9,
 'USDSEK': 10,
 'USDJPY': 11,
 'USDDKK': 12,
 'EURJPY': 13,
 'EURCAD': 14,
 'EURGBP': 15,
 'USDNOK': 16}

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [None]:
def sffs_with_lda(stock_name):
    # Data split
    X_train, X_val1, X_val2, X_test, y_train, y_val1, y_val2, y_test = getProcessedData(getFilenames(fx_tic)[fx_dic[stock_name]])

    # Create a Linear Discriminant Analysis (LDA) estimator
    lda = LinearDiscriminantAnalysis()
    lda1 = lda.fit(X_train, y_train)
    # Perform Sequential Forward Feature Selection (SFFS) to determine 
    # the best combination of 20 features that maximize classification accuracy
    # We use LDA as the base estimator for this selection
    sffs = SFS(lda1,
           # Number of features to select
           k_features=20,
           # Indicate this is forward selection
           forward=True,
           # Allows removal of features during addition 
           floating=True,
           # Use accuracy as the metric 
           scoring='accuracy',
           # No cross-validation
           cv=0,
           # Use all available CPU cores
           n_jobs=-1)
    
    # Fit the SFFS algorithm on training data
    sffs = sffs.fit(X_train, y_train)

    # Return the indices of the selected features, the best accuracy score, and the SFFS object
    return list(sffs.k_feature_idx_), sffs.k_score_, sffs

In [None]:
# Loop through each ticker, apply SFFS with LDA, and print results
for ele in fx_tic:
    res = sffs_with_lda(ele)
    print(ele)
    # Indices of selected features
    print(res[0])
    # Best accuracy score
    print(res[1])

EURNZD
[8, 9, 10, 11, 16, 18, 21, 23, 26, 28, 31, 35, 36, 38, 46, 50, 51, 52, 53, 55]
0.573237885462555
USDCHF
[2, 5, 7, 8, 10, 11, 17, 25, 27, 35, 41, 43, 47, 48, 51, 52, 53, 54, 55, 57]
0.554210236653825
EURCHF
[4, 11, 12, 14, 17, 18, 19, 26, 29, 32, 33, 34, 39, 40, 45, 47, 49, 50, 51, 52]
0.5610432852386238
USDEUR
[0, 6, 11, 12, 16, 22, 23, 24, 25, 28, 30, 31, 33, 38, 44, 45, 48, 49, 54, 59]
0.5599334073251943
USDGBP
[1, 2, 5, 13, 16, 18, 23, 24, 25, 31, 35, 36, 38, 41, 42, 46, 48, 51, 52, 54]
0.5686707115278544
EURNOK
[2, 8, 10, 11, 13, 17, 18, 21, 23, 24, 25, 40, 41, 42, 43, 47, 53, 56, 57, 58]
0.5443951165371809
EURSEK
[2, 4, 8, 10, 11, 13, 15, 20, 22, 23, 33, 39, 40, 41, 44, 48, 51, 54, 55, 59]
0.5554938956714761
USDNZD
[5, 6, 8, 10, 13, 14, 17, 20, 21, 23, 24, 25, 33, 37, 40, 43, 51, 52, 53, 57]
0.5637065637065637
USDCAD
[5, 8, 10, 11, 17, 18, 20, 23, 24, 25, 31, 33, 35, 40, 42, 52, 54, 55, 56, 57]
0.5679691799669785
EURDKK
[1, 6, 8, 10, 17, 21, 26, 30, 31, 35, 37, 43, 44, 45, 