In [477]:
# Install libraries the first time
# ! pip install -q ipynb yfinance pandas pathlib numpy

In [478]:
from fastai.tabular.all import *
import yfinance as yf
import pandas as pd
import pathlib
from datetime import *
import numpy as np
import matplotlib.pyplot as plt
import random

### Variables

In [495]:
modelName = 'stockScreenerV2.0'
trainingData = 'stockData.csv'
getNewData = True 
trainNewModel = True
predictionTarget = None # 'ALL' for all tickers decending, 'None' for no prediction

# Training parameters
trainingSize = 10 # Number of stocks to get from the test tickers
timeFrame = '2y'  # Options: 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max
yNames = ['Future Year Change']
catNames = ['Industry']
contNames = [
    'Open',
    'Close', 
    'Volume', 
    'Dividends', 
    'Stock Splits', 
    'EV/EBIT', 
    'ROIC'
]
epochs = 3

# Test parameters
testSize = 200 # Number of stocks to test. 'ALL' for all non-training stocks

# Folder- and file paths
dataFolder = Path.cwd().parent / 'TrainingData'
dataName = 'stockData.csv'
testFolder = Path.cwd().parent / 'TestData'
trainingFolder = Path.cwd().parent / 'TrainingData'
modelFolder = Path.cwd().parent.parent / 'TrainedModels' / 'stockScreener'
testFolder = Path.cwd().parent / 'TestData'

# Getting historic stock data

### Ticker cleaning (remove duplicates)

In [480]:
# Load the data from the CSV file
df = pd.read_csv(testFolder / 'tickers.csv')
    
# Remove duplicate rows
df = df.drop_duplicates().reset_index(drop=True)
    
# Save the cleaned data to a new file
df.to_csv(testFolder / 'tickers.csv', index=False)

### Functions for Data Collection and Processing

In [481]:
def get_industry(ticker_symbol):
    try:
        return yf.Ticker(ticker_symbol).info.get('industry', 'Unknown')
    except Exception as e:
        print(f"Error fetching industry for {ticker_symbol}: {e}")
        return 'Error'

In [482]:
def calculate_future_year_change(ticker_symbol, timeframe, buffer=1):
    valid_periods = ['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']
    try:
        if timeframe == 'max':
            future_change = yf.Ticker(ticker_symbol).history(period='max')
            future_change['Future Year Change'] = (future_change['Close'].shift(-252) / future_change['Close'] - 1)
            future_change = future_change.dropna(subset=['Future Year Change'])
        else:
            extended_timeframe = valid_periods[valid_periods.index(timeframe) + buffer]
            future_change = yf.Ticker(ticker_symbol).history(period=extended_timeframe)
            future_change['Future Year Change'] = (future_change['Close'].shift(-252) / future_change['Close'] - 1)

            end_date = future_change.index[-1] - pd.DateOffset(years=1)
            start_date = end_date - pd.DateOffset(years=int(timeframe[:-1]))
            future_change = future_change.loc[start_date:end_date].dropna(subset=['Future Year Change'])
        return future_change
    except Exception as e:
        print(f"Error processing {ticker_symbol}: {e}")
        return pd.DataFrame()

In [483]:
def get_static_ev_data(ticker_symbol):
    try:
        info = yf.Ticker(ticker_symbol).info
        return info.get('totalDebt', 0), info.get('totalCash', 0), info.get('sharesOutstanding', None)
    except Exception as e:
        print(f"Error fetching EV components for {ticker_symbol}: {e}")
        return None, None, None

In [484]:
def calculate_approx_ebit(ticker_symbol):
    try:
        info = yf.Ticker(ticker_symbol).info
        revenue = info.get('totalRevenue', None)
        operating_income = info.get('operatingIncome', None)
        return operating_income if operating_income else (revenue * 0.15 if revenue else None)
    except Exception as e:
        print(f"Error calculating EBIT for {ticker_symbol}: {e}")
        return None

In [485]:
def calculate_daily_ev_ebit(hist_data):
    hist_data['EV/EBIT'] = None
    for symbol in hist_data['Ticker'].unique():
        ticker_data = hist_data[hist_data['Ticker'] == symbol].copy()
        total_debt, cash, shares_outstanding = get_static_ev_data(symbol)
        ebit = calculate_approx_ebit(symbol)

        if shares_outstanding and ebit and ebit != 0:
            ticker_data['EV'] = (ticker_data['Close'] * shares_outstanding) + total_debt - cash
            ticker_data['EV/EBIT'] = ticker_data['EV'] / ebit
            hist_data.loc[ticker_data.index, 'EV/EBIT'] = ticker_data['EV/EBIT']
    return hist_data.drop(columns=['EV'], errors='ignore')

In [486]:
def calculate_daily_roic(hist_data):
    for symbol in hist_data['Ticker'].unique():
        ticker_data = hist_data[hist_data['Ticker'] == symbol].copy()
        total_debt, cash, shares_outstanding = get_static_ev_data(symbol)
        ebit = calculate_approx_ebit(symbol)

        if shares_outstanding and ebit:
            tax_rate = 0.21
            nopat = ebit * (1 - tax_rate)
            invested_capital = total_debt + (ticker_data['Close'] * shares_outstanding) - cash
            ticker_data['ROIC'] = np.where(invested_capital != 0, nopat / invested_capital, None)
            hist_data.loc[ticker_data.index, 'ROIC'] = ticker_data['ROIC']
    return hist_data

In [487]:
def getTickerData(ticker_symbol):
    try:
        ticker = yf.Ticker(ticker_symbol)
        hist = ticker.history(period='1d')
        total_debt, cash, shares_outstanding = get_static_ev_data(ticker_symbol)
        ebit = calculate_approx_ebit(ticker_symbol)
        ev = (hist['Close'].iloc[-1] * shares_outstanding) + total_debt - cash
        ev_ebit = ev / ebit if ebit else None
        market_cap = hist['Close'].iloc[-1] * shares_outstanding
        tax_rate = 0.21
        nopat = ebit * (1 - tax_rate) if ebit else None
        invested_capital = total_debt + market_cap - cash
        roic = nopat / invested_capital if nopat and invested_capital else None
        industry = get_industry(ticker_symbol)

        return pd.DataFrame([{
            'Open': hist['Open'].iloc[-1],
            'High': hist['High'].iloc[-1],
            'Low': hist['Low'].iloc[-1],
            'Close': hist['Close'].iloc[-1],
            'Volume': hist['Volume'].iloc[-1],
            'Dividends': hist.get('Dividends', pd.Series([0.0])).iloc[-1],
            'Stock Splits': hist.get('Stock Splits', pd.Series([0.0])).iloc[-1],
            'EV/EBIT': ev_ebit,
            'Market Cap': market_cap,
            'ROIC': roic,
            'Industry': industry
        }])
    except Exception as e:
        print(f"Error fetching data for {ticker_symbol}: {e}")
        return pd.DataFrame()

In [488]:
def getTickerDataFrom1YrAgo(ticker_symbol):
    try:
        # Fetch ticker data
        ticker = yf.Ticker(ticker_symbol)

        # Define the date range: one year ago to today
        today = datetime.today()
        one_year_ago = today - timedelta(days=365)

        # Fetch historical data for one year ago
        hist = ticker.history(start=(one_year_ago - timedelta(days=30)).strftime('%Y-%m-%d'), 
                              end=(one_year_ago + timedelta(days=0)).strftime('%Y-%m-%d'))
        if hist.empty:
            raise ValueError(f"No historical data available for {ticker_symbol} around {one_year_ago.strftime('%Y-%m-%d')}.")

        # Extract the closest data point to one year ago
        row = hist.iloc[0]  # Get the first available entry within the date range

        # Price today
        price_today = ticker.history(period='1d')['Close'].iloc[-1]

        # Calculate future price change (from one year ago to today)
        price_change_future = ((price_today - row['Close']) / row['Close']) if row['Close'] else None

        # Collect additional data
        total_debt, cash, shares_outstanding = get_static_ev_data(ticker_symbol)
        ebit = calculate_approx_ebit(ticker_symbol)
        ev = (row['Close'] * shares_outstanding) + total_debt - cash if shares_outstanding else None
        ev_ebit = ev / ebit if ebit else None
        market_cap = row['Close'] * shares_outstanding if shares_outstanding else None
        tax_rate = 0.21
        nopat = ebit * (1 - tax_rate) if ebit else None
        invested_capital = total_debt + market_cap - cash if market_cap and total_debt and cash else None
        roic = nopat / invested_capital if nopat and invested_capital else None
        industry = get_industry(ticker_symbol)

        # Return as a DataFrame
        return pd.DataFrame([{
            'Date': row.name,
            'Open': row['Open'],
            'High': row['High'],
            'Low': row['Low'],
            'Close': row['Close'],
            'Volume': row['Volume'],
            'Dividends': row.get('Dividends', 0.0),
            'Stock Splits': row.get('Stock Splits', 0.0),
            'Future Year Change': price_change_future,
            'Industry': industry,
            'EV/EBIT': ev_ebit,
            'ROIC': roic
        }])

    except Exception as e:
        print(f"Error fetching data for {ticker_symbol}: {e}")
        return pd.DataFrame()


In [489]:
if getNewData == True:
    hist_data = pd.DataFrame()
    symbols = pd.read_csv(testFolder / 'tickers.csv')['Ticker']
    trainingTickers = np.random.choice(symbols, size=trainingSize, replace=False).tolist()
    for trainingTicker in trainingTickers:
        try:
            future_change = calculate_future_year_change(trainingTicker, timeFrame)
            future_change['Ticker'] = trainingTicker
            future_change['Industry'] = get_industry(trainingTicker)
            hist_data = pd.concat([hist_data, future_change])
        except Exception as e:
            print(f"Error processing {trainingTicker}: {e}")

    hist_data.reset_index(inplace=True)
    hist_data = calculate_daily_ev_ebit(hist_data)
    hist_data = calculate_daily_roic(hist_data)
    hist_data.to_csv(dataFolder / trainingData, index=False)

trainingRows = pd.read_csv(dataFolder / trainingData)
print(len(trainingRows))

$FCH-C: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")


Error processing FCH-C: index -1 is out of bounds for axis 0 with size 0


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/FCH-C?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=FCH-C&crumb=vSmwn1lwQ8s
  hist_data = pd.concat([hist_data, future_change])
$SWJ: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")
  hist_data = pd.concat([hist_data, future_change])


Error processing SWJ: index -1 is out of bounds for axis 0 with size 0


$ARI-A: possibly delisted; no price data found  (period=5y) (Yahoo error = "No data found, symbol may be delisted")


Error processing ARI-A: index -1 is out of bounds for axis 0 with size 0


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ARI-A?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=ARI-A&crumb=vSmwn1lwQ8s
  hist_data = pd.concat([hist_data, future_change])
$SQNM: possibly delisted; no price data found  (period=5y)


Error processing SQNM: index -1 is out of bounds for axis 0 with size 0


  hist_data = pd.concat([hist_data, future_change])


3000


Then we can have a look at how the data is structured:

In [490]:
dataPath = Path()
df = pd.read_csv(trainingFolder/trainingData)
df.head()

Unnamed: 0,index,Ticker,Industry,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains,Future Year Change,EV/EBIT,ROIC
0,2022-01-21 00:00:00-05:00,EUFN,Unknown,17.84636,17.88083,17.682634,17.760189,1681400.0,0.0,0.0,0.0,-0.011868,,
1,2022-01-24 00:00:00-05:00,EUFN,Unknown,17.260387,17.458584,16.950165,17.441349,2376200.0,0.0,0.0,0.0,0.013927,,
2,2022-01-25 00:00:00-05:00,EUFN,Unknown,17.363796,17.682635,17.22592,17.57061,1410000.0,0.0,0.0,0.0,0.017213,,
3,2022-01-26 00:00:00-05:00,EUFN,Unknown,17.889447,17.958385,17.570607,17.674015,3323000.0,0.0,0.0,0.0,0.007193,,
4,2022-01-27 00:00:00-05:00,EUFN,Unknown,17.915301,17.992855,17.579226,17.691252,1972400.0,0.0,0.0,0.0,-0.000395,,


Some of the columns are continuous (like age) and we will treat them as float numbers we can feed our model directly. Others are categorical (like workclass or education) and we will convert them to a unique index that we will feed to embedding layers. We can specify our categorical and continuous column names, as well as the name of the dependent variable in TabularDataLoaders factory methods:

In [496]:
dls = TabularDataLoaders.from_csv(trainingFolder / trainingData, path=dataPath, 
    y_names=yNames,
    cat_names=catNames,
    cont_names=contNames,
    procs = [Categorify, FillMissing, Normalize])

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  to[n].fillna(self.na_dict[n], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  to[n].fillna(self.na_dict[n], inplace=True)


The last part is the list of pre-processors we apply to our data:

* Categorify is going to take every categorical variable and make a map from integer to unique categories, then replace the values by the corresponding index.
* FillMissing will fill the missing values in the continuous variables by the median of existing values (you can choose a specific value if you prefer)
* Normalize will normalize the continuous variables (subtract the mean and divide by the std)

To further expose what’s going on below the surface, let’s rewrite this utilizing fastai’s TabularPandas class. We will need to make one adjustment, which is defining how we want to split our data. By default the factory method above used a random 80/20 split, so we will do the same:

In [497]:
splits = EndSplitter (valid_pct=0.2, valid_last=True)(range_of(df))

In [498]:
to = TabularPandas(df, procs=[Categorify, FillMissing, Normalize],
    y_names=yNames,
    cat_names = catNames,
    cont_names = contNames,
    splits=splits)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  to[n].fillna(self.na_dict[n], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  to[n].fillna(self.na_dict[n], inplace=True)


Once we build our TabularPandas object, our data is completely preprocessed as seen below:

In [499]:
to.xs.iloc[:1]

Unnamed: 0,index,EV/EBIT_na,ROIC_na,Open,Close,Volume,Dividends,Stock Splits,EV/EBIT,ROIC
0,1,2,2,-0.508579,-0.51,1.204792,-0.035072,0.0,-0.499398,0.368375


Now we can build our DataLoaders again:

In [500]:
dls = to.dataloaders(bs=64)

The show_batch method works like for every other application:

In [501]:
dls.show_batch()

Unnamed: 0,index,EV/EBIT_na,ROIC_na,Open,Close,Volume,Dividends,Stock Splits,EV/EBIT,ROIC,Future Year Change
0,2023-04-13 00:00:00-04:00,False,False,4.45,4.450002,2199.981391,1.762562e-11,0.0,4.768,0.165688,-0.377528
1,2022-02-03 00:00:00-05:00,False,False,145.039994,142.510002,469400.00141,1.762562e-11,0.0,58.325641,0.013545,0.572802
2,2022-08-22 00:00:00-04:00,False,False,32.84,32.080001,147124.997336,1.762562e-11,0.0,5.136837,0.153791,0.029676
3,2023-12-19 00:00:00-05:00,False,False,33.68,34.220001,322000.006916,1.762562e-11,0.0,8.652992,0.091298,-0.254237
4,2022-03-08 00:00:00-05:00,False,False,17.270002,17.49,74699.993663,1.762562e-11,0.0,4.908867,0.160933,-0.189251
5,2023-03-28 00:00:00-04:00,False,False,4.12,4.119998,2600.008322,1.762562e-11,0.0,4.487694,0.176037,-0.313107
6,2023-08-24 00:00:00-04:00,False,False,3.709999,3.76,1900.003778,1.762562e-11,0.0,4.181904,0.188909,-0.260638
7,2022-02-28 00:00:00-05:00,False,False,34.52,35.231999,260124.996688,1.762562e-11,0.0,5.477122,0.144236,-0.133969
8,2022-05-13 00:00:00-04:00,False,False,33.576,33.895999,194750.006328,1.762562e-11,0.0,5.332889,0.148137,-0.175596
9,2022-10-26 00:00:00-04:00,False,False,30.568002,30.616,134875.002997,1.762562e-11,0.0,4.978785,0.158673,0.006794


We can define a model using the tabular_learner method. When we define our model, fastai will try to infer the loss function based on our y_names earlier.

Note: Sometimes with tabular data, your y’s may be encoded (such as 0 and 1). In such a case you should explicitly pass y_block = CategoryBlock in your constructor so fastai won’t presume you are doing regression.

And we can train that model with the fit_one_cycle method (the fine_tune method won’t be useful here since we don’t have a pretrained model).

We can then have a look at some training predictions:

In [502]:
if trainNewModel == True:
    learn = tabular_learner(dls, metrics=[rmse, mae])

    print(f"Training {modelName} for {epochs} epochs")
    learn.fit_one_cycle(epochs)

    learn.show_results(max_n=15)

    learn.export(modelFolder / f'{modelName}.pkl')

Training stockScreenerV2.0 for 3 epochs


epoch,train_loss,valid_loss,_rmse,mae,time
0,0.164666,0.331723,0.575954,0.546322,00:00
1,0.110369,0.268406,0.51808,0.388339,00:00
2,0.081473,0.293869,0.542096,0.379748,00:00


Unnamed: 0,index,EV/EBIT_na,ROIC_na,Open,Close,Volume,Dividends,Stock Splits,EV/EBIT,ROIC,Future Year Change,Future Year Change_pred
0,248.0,1.0,1.0,-0.735732,-0.734971,-0.46961,-0.035072,0.0,-0.32285,-0.825311,-0.409302,-0.353596
1,463.0,1.0,1.0,-0.749292,-0.749815,-0.439063,-0.035072,0.0,-0.551052,1.361729,-0.3125,0.393161
2,363.0,1.0,1.0,-0.741069,-0.741024,-0.52367,-0.035072,0.0,-0.415903,-0.399069,-0.439306,-0.214906
3,171.0,1.0,1.0,-0.742512,-0.743618,-0.395605,-0.035072,0.0,-0.455783,-0.105512,-0.283871,-0.133416
4,39.0,1.0,1.0,-0.724336,-0.722,-0.187971,-0.035072,0.0,-0.123451,-1.254191,-0.55082,-0.387987
5,376.0,1.0,1.0,-0.741069,-0.741312,-0.453654,-0.035072,0.0,-0.420334,-0.370996,-0.397661,-0.201124
6,71.0,1.0,1.0,-0.734289,-0.73425,-0.408726,-0.035072,0.0,-0.311772,-0.861498,-0.377273,-0.397314
7,43.0,1.0,1.0,-0.720152,-0.721424,-0.33808,-0.035072,0.0,-0.114588,-1.266166,-0.550162,-0.405838
8,421.0,1.0,1.0,-0.720873,-0.720991,-0.557681,-0.035072,0.0,-0.55027,1.34057,-0.169872,0.669314
9,292.0,1.0,1.0,-0.74583,-0.745203,-0.238252,-0.035072,0.0,-0.480154,0.133105,1.0,-0.075873


## Evaluation

In [503]:
def evaluate_model(learn, testTickers, model_name, model_folder, cont_names, cat_names):
    """
    Evaluate a fastai model on a list of test tickers and log the results.
    
    Args:
        learn: fastai Learner object
        testTickers (list): List of ticker symbols to test on
        model_name (str): Name of the model for logging
        model_folder (Path): Path to save evaluation results
        cont_names (list): List of continuous feature names
        cat_names (list): List of categorical feature names
    """
    all_predictions = []
    all_actuals = []
    test_data_list = []
    
    # Collect test data for all tickers
    for ticker in testTickers:
        try:
            # Get test data
            test_data = getTickerDataFrom1YrAgo(ticker)
            if test_data.empty:
                print(f"Skipping {ticker} due to missing data")
                continue
            
            test_data_list.append(test_data)
            
        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")
            continue
    
    if not test_data_list:
        print("No valid test data collected")
        return None, None, None
    
    # Combine all test data
    combined_test_data = pd.concat(test_data_list, ignore_index=True)
    
    # Create fastai test dataloader
    test_dl = learn.dls.test_dl(combined_test_data)
    
    # Get predictions
    preds, targs = learn.get_preds(dl=test_dl)
    
    # Convert to numpy arrays
    predictions = preds.numpy()
    actuals = targs.numpy()
    
    # Create DataFrame for analysis
    results_df = pd.DataFrame({
        'Predicted': predictions.flatten(),
        'Actual': actuals.flatten()
    })

    # Calculate residuals
    results_df['Residual'] = results_df['Actual'] - results_df['Predicted']

    # Define outlier threshold (2 standard deviations)
    outlier_threshold = 2 * results_df['Residual'].std()

    # Filter outliers
    filtered_df = results_df[abs(results_df['Residual']) <= outlier_threshold]

    # Calculate metrics
    mae = np.mean(np.abs(filtered_df['Residual']))
    rmse = np.sqrt(np.mean(filtered_df['Residual']**2))
    r2 = 1 - (np.sum(filtered_df['Residual']**2) / 
              np.sum((filtered_df['Actual'] - filtered_df['Actual'].mean())**2))

    # Log results
    log_evaluation(model_name, mae, rmse, r2, model_folder, testTickers)
    
    # Create visualizations
    plot_results(filtered_df, model_name, model_folder)
    
    return mae, rmse, r2

def log_evaluation(model_name, mae, rmse, r2, model_folder, testTickers):
    """Log evaluation metrics to CSV file"""
    log_file = model_folder / "modelEvaluations.csv"
    
    new_entry_df = pd.DataFrame([{
        "Model Name": modelName,
        "Timestamp": datetime.now().strftime('%Y-%m-%d %H:%M'),
        "MAE": f'{mae:.3f}',
        "RMSE": f'{rmse:.3f}',
        "R2": f'{r2:.3f}',
        "Epochs": epochs,
        "Training Size": trainingSize,
        "Test Size": len(testTickers),
        "Cat Names": catNames,
        "Cont Names": contNames,
    }])
    
    try:
        log_df = pd.read_csv(log_file)
        log_df = pd.concat([log_df, new_entry_df], ignore_index=True)
    except FileNotFoundError:
        log_df = new_entry_df
        
    log_df.to_csv(log_file, index=False)
    print(f"Logged evaluation results to {log_file}")

def plot_results(filtered_df, model_name, model_folder):
    """Create and save visualization plots"""
    plt.figure(figsize=(12, 8))
    
    # Scatter plot
    plt.subplot(2, 1, 1)
    actuals = filtered_df['Actual']
    predictions = filtered_df['Predicted']
    plt.scatter(actuals, predictions, alpha=0.7, label='Predictions')
    
    # Perfect prediction line
    min_val = min(actuals.min(), predictions.min())
    max_val = max(actuals.max(), predictions.max())
    plt.plot([min_val, max_val], [min_val, max_val], 
             color='red', linestyle='--', label='Perfect Prediction')
    
    plt.title(f'Predicted vs. Actual Returns - {model_name}', fontsize=14)
    plt.xlabel('Actual Returns', fontsize=12)
    plt.ylabel('Predicted Returns', fontsize=12)
    plt.legend()
    plt.grid(alpha=0.5)
    
    # Residual plot
    plt.subplot(2, 1, 2)
    plt.scatter(predictions, filtered_df['Residual'], alpha=0.7)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.title('Residual Plot', fontsize=14)
    plt.xlabel('Predicted Returns', fontsize=12)
    plt.ylabel('Residual', fontsize=12)
    plt.grid(alpha=0.5)
    
    plt.tight_layout()
    
    # Show plot
    plt.show()

In [504]:
if trainNewModel == True:
    # Evaluate model
    nonTrainingTickers = list(set(symbols) - set(trainingTickers))
    
    if not nonTrainingTickers:
        raise ValueError("No tickers available for testing. Ensure nonTrainingTickers is populated correctly.")
    
    if testSize == 'ALL':
        testTickers = nonTrainingTickers
    else:
        testTickers = np.random.choice(nonTrainingTickers, size=min(testSize, len(nonTrainingTickers)), replace=False).tolist()
    
    print(f"Amount of test tickers: {len(testTickers)}, Expected: {testSize}")
    print(testTickers)
    
    mae, rmse, r2 = evaluate_model(
        learn=learn,  # Your fastai learner
        testTickers=testTickers,
        model_name=modelName,
        model_folder=modelFolder,
        cont_names=contNames,
        cat_names=catNames
    )
    
    if mae is not None and rmse is not None and r2 is not None:
        print(f"Evaluation Results:")
        print(f"MAE: {mae:.3f}")
        print(f"RMSE: {rmse:.3f}")
        print(f"R2: {r2:.3f}")
    else:
        print("Evaluation failed. Metrics are None.")

Amount of test tickers: 200, Expected: 200
['TVE', 'AIZ', 'GRX', 'BVN', 'APC', 'MBLX', 'NDSN', 'AFT', 'SENEB', 'SHBI', 'CCC', 'OKE', 'NWSA', 'EDT', 'AUO', 'OPHC', 'CEM', 'CPA', 'ADRD', 'OLN', 'FULL', 'MRH', 'RYL', 'EEFT', 'KELYB', 'JPZ', 'GB', 'CAE', 'WERN', 'HPY', 'ROLL', 'AGD', 'STNG', 'RFI', 'CLV', 'TSU', 'PCG', 'STP', 'CBNJ', 'AFH', 'RIT', 'TS', 'SPNS', 'SGMO', 'PHK', 'DDD', 'NBHC', 'SBY', 'TXT', 'AXS', 'AAXJ', 'ARCC', 'C', 'ASBCW', 'PBI-B', 'WIRE', 'GLRE', 'DTT', 'ARIA', 'SBR', 'SSD', 'PEO', 'DATE', 'STAG', 'APU', 'XUE', 'CMLP', 'JNS', 'PAYX', 'TRIP', 'EXEL', 'CNSI', 'FOE', 'IRF', 'AVGO', 'HTS-A', 'KMM', 'DRE', 'LMNX', 'HSH', 'TOWN', 'SGU', 'MFNC', 'NMT-C', 'SMSI', 'TDI', 'NTWK', 'ABAX', 'HSIC', 'TXRH', 'TYL', 'C-P', 'C-Q', 'PBNY', 'PMX', 'GKNT', 'DSL', 'RNDY', 'OVAS', 'PFBI', 'ESD', 'BSMX', 'NCT', 'MSBF', 'NTAP', 'JZL', 'FWLT', 'PAAS', 'HDNG', 'PWRD', 'MYD', 'GGG', 'KBR', 'ARII', 'WBS', 'MTOR', 'CFR-A', 'WSCI', 'GOODN', 'KONG', 'BYFC', 'SUPN', 'LKFN', 'HPF', 'BIOD', 'MHI', 'MICT'

$APC: possibly delisted; no timezone found
$MBLX: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for APC: No historical data available for APC around 2024-01-22.
Skipping APC due to missing data
Error fetching data for MBLX: No historical data available for MBLX around 2024-01-22.
Skipping MBLX due to missing data


$AFT: possibly delisted; no timezone found


Error fetching data for AFT: No historical data available for AFT around 2024-01-22.
Skipping AFT due to missing data


$SENEB: possibly delisted; no price data found  (period=1d)


Error fetching data for SENEB: single positional indexer is out-of-bounds
Skipping SENEB due to missing data


$CCC: possibly delisted; no timezone found


Error fetching data for CCC: No historical data available for CCC around 2024-01-22.
Skipping CCC due to missing data


$EDT: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for EDT: No historical data available for EDT around 2024-01-22.
Skipping EDT due to missing data


$AUO: possibly delisted; no timezone found


Error fetching data for AUO: No historical data available for AUO around 2024-01-22.
Skipping AUO due to missing data


$CEM: possibly delisted; no timezone found


Error fetching data for CEM: No historical data available for CEM around 2024-01-22.
Skipping CEM due to missing data


$ADRD: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for ADRD: No historical data available for ADRD around 2024-01-22.
Skipping ADRD due to missing data


$FULL: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for FULL: No historical data available for FULL around 2024-01-22.
Skipping FULL due to missing data


$MRH: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)
$RYL: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for MRH: No historical data available for MRH around 2024-01-22.
Skipping MRH due to missing data
Error fetching data for RYL: No historical data available for RYL around 2024-01-22.
Skipping RYL due to missing data


$KELYB: possibly delisted; no price data found  (period=1d)
$JPZ: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for KELYB: single positional indexer is out-of-bounds
Skipping KELYB due to missing data
Error fetching data for JPZ: No historical data available for JPZ around 2024-01-22.
Skipping JPZ due to missing data


$HPY: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for HPY: No historical data available for HPY around 2024-01-22.
Skipping HPY due to missing data


$ROLL: possibly delisted; no timezone found


Error fetching data for ROLL: No historical data available for ROLL around 2024-01-22.
Skipping ROLL due to missing data


$CLV: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for CLV: No historical data available for CLV around 2024-01-22.
Skipping CLV due to missing data


$TSU: possibly delisted; no timezone found


Error fetching data for TSU: No historical data available for TSU around 2024-01-22.
Skipping TSU due to missing data


$STP: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for STP: No historical data available for STP around 2024-01-22.
Skipping STP due to missing data


$CBNJ: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for CBNJ: No historical data available for CBNJ around 2024-01-22.
Skipping CBNJ due to missing data


$AFH: possibly delisted; no timezone found
$RIT: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for AFH: No historical data available for AFH around 2024-01-22.
Skipping AFH due to missing data
Error fetching data for RIT: No historical data available for RIT around 2024-01-22.
Skipping RIT due to missing data


$SBY: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for SBY: No historical data available for SBY around 2024-01-22.
Skipping SBY due to missing data


$ASBCW: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for ASBCW: No historical data available for ASBCW around 2024-01-22.
Skipping ASBCW due to missing data


$PBI-B: possibly delisted; no timezone found


Error fetching data for PBI-B: No historical data available for PBI-B around 2024-01-22.
Skipping PBI-B due to missing data


$WIRE: possibly delisted; no timezone found


Error fetching data for WIRE: No historical data available for WIRE around 2024-01-22.
Skipping WIRE due to missing data


$DTT: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)
$ARIA: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for DTT: No historical data available for DTT around 2024-01-22.
Skipping DTT due to missing data
Error fetching data for ARIA: No historical data available for ARIA around 2024-01-22.
Skipping ARIA due to missing data


$DATE: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for DATE: No historical data available for DATE around 2024-01-22.
Skipping DATE due to missing data


$APU: possibly delisted; no timezone found
$XUE: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for APU: No historical data available for APU around 2024-01-22.
Skipping APU due to missing data
Error fetching data for XUE: No historical data available for XUE around 2024-01-22.
Skipping XUE due to missing data


$CMLP: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for CMLP: No historical data available for CMLP around 2024-01-22.
Skipping CMLP due to missing data


$JNS: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for JNS: No historical data available for JNS around 2024-01-22.
Skipping JNS due to missing data


$CNSI: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for CNSI: No historical data available for CNSI around 2024-01-22.
Skipping CNSI due to missing data


$FOE: possibly delisted; no timezone found
$IRF: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for FOE: No historical data available for FOE around 2024-01-22.
Skipping FOE due to missing data
Error fetching data for IRF: No historical data available for IRF around 2024-01-22.
Skipping IRF due to missing data


$HTS-A: possibly delisted; no timezone found
$KMM: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for HTS-A: No historical data available for HTS-A around 2024-01-22.
Skipping HTS-A due to missing data
Error fetching data for KMM: No historical data available for KMM around 2024-01-22.
Skipping KMM due to missing data


$DRE: possibly delisted; no timezone found


Error fetching data for DRE: No historical data available for DRE around 2024-01-22.
Skipping DRE due to missing data


$LMNX: possibly delisted; no timezone found
$HSH: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for LMNX: No historical data available for LMNX around 2024-01-22.
Skipping LMNX due to missing data
Error fetching data for HSH: No historical data available for HSH around 2024-01-22.
Skipping HSH due to missing data


$MFNC: possibly delisted; no timezone found


Error fetching data for MFNC: No historical data available for MFNC around 2024-01-22.
Skipping MFNC due to missing data


$NMT-C: possibly delisted; no timezone found


Error fetching data for NMT-C: No historical data available for NMT-C around 2024-01-22.
Skipping NMT-C due to missing data


$TDI: possibly delisted; no price data found  (period=1d)


Error fetching data for TDI: single positional indexer is out-of-bounds
Skipping TDI due to missing data


$ABAX: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for ABAX: No historical data available for ABAX around 2024-01-22.
Skipping ABAX due to missing data


$C-P: possibly delisted; no timezone found


Error fetching data for C-P: No historical data available for C-P around 2024-01-22.
Skipping C-P due to missing data


$C-Q: possibly delisted; no timezone found
$PBNY: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for C-Q: No historical data available for C-Q around 2024-01-22.
Skipping C-Q due to missing data
Error fetching data for PBNY: No historical data available for PBNY around 2024-01-22.
Skipping PBNY due to missing data


$GKNT: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for GKNT: No historical data available for GKNT around 2024-01-22.
Skipping GKNT due to missing data


$RNDY: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)
$OVAS: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for RNDY: No historical data available for RNDY around 2024-01-22.
Skipping RNDY due to missing data
Error fetching data for OVAS: No historical data available for OVAS around 2024-01-22.
Skipping OVAS due to missing data


$PFBI: possibly delisted; no timezone found
$ESD: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for PFBI: No historical data available for PFBI around 2024-01-22.
Skipping PFBI due to missing data
Error fetching data for ESD: No historical data available for ESD around 2024-01-22.
Skipping ESD due to missing data


$BSMX: possibly delisted; no timezone found
$NCT: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for BSMX: No historical data available for BSMX around 2024-01-22.
Skipping BSMX due to missing data
Error fetching data for NCT: No historical data available for NCT around 2024-01-22.
Skipping NCT due to missing data


$MSBF: possibly delisted; no timezone found


Error fetching data for MSBF: No historical data available for MSBF around 2024-01-22.
Skipping MSBF due to missing data


$JZL: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)
$FWLT: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for JZL: No historical data available for JZL around 2024-01-22.
Skipping JZL due to missing data
Error fetching data for FWLT: No historical data available for FWLT around 2024-01-22.
Skipping FWLT due to missing data


$HDNG: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)
$PWRD: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for HDNG: No historical data available for HDNG around 2024-01-22.
Skipping HDNG due to missing data
Error fetching data for PWRD: No historical data available for PWRD around 2024-01-22.
Skipping PWRD due to missing data


$ARII: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for ARII: No historical data available for ARII around 2024-01-22.
Skipping ARII due to missing data


$MTOR: possibly delisted; no timezone found


Error fetching data for MTOR: No historical data available for MTOR around 2024-01-22.
Skipping MTOR due to missing data


$CFR-A: possibly delisted; no timezone found
$WSCI: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for CFR-A: No historical data available for CFR-A around 2024-01-22.
Skipping CFR-A due to missing data
Error fetching data for WSCI: No historical data available for WSCI around 2024-01-22.
Skipping WSCI due to missing data
Error fetching data for GOODN: unsupported operand type(s) for /: 'NoneType' and 'float'
Skipping GOODN due to missing data


$KONG: possibly delisted; no price data found  (period=1d)


Error fetching data for KONG: single positional indexer is out-of-bounds
Skipping KONG due to missing data


$BIOD: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for BIOD: No historical data available for BIOD around 2024-01-22.
Skipping BIOD due to missing data


$MICT: possibly delisted; no timezone found


Error fetching data for MICT: No historical data available for MICT around 2024-01-22.
Skipping MICT due to missing data


$AIF: possibly delisted; no timezone found


Error fetching data for AIF: No historical data available for AIF around 2024-01-22.
Skipping AIF due to missing data


$GCAP: possibly delisted; no timezone found


Error fetching data for GCAP: No historical data available for GCAP around 2024-01-22.
Skipping GCAP due to missing data


$XXIA: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)
$CADX: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for XXIA: No historical data available for XXIA around 2024-01-22.
Skipping XXIA due to missing data
Error fetching data for CADX: No historical data available for CADX around 2024-01-22.
Skipping CADX due to missing data


$STSI: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for STSI: No historical data available for STSI around 2024-01-22.
Skipping STSI due to missing data


$RALY: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for RALY: No historical data available for RALY around 2024-01-22.
Skipping RALY due to missing data


$TMK: possibly delisted; no timezone found


Error fetching data for TMK: No historical data available for TMK around 2024-01-22.
Skipping TMK due to missing data


$NLSN: possibly delisted; no timezone found


Error fetching data for NLSN: No historical data available for NLSN around 2024-01-22.
Skipping NLSN due to missing data


$SCOK: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for SCOK: No historical data available for SCOK around 2024-01-22.
Skipping SCOK due to missing data


$ORIT: possibly delisted; no timezone found


Error fetching data for ORIT: No historical data available for ORIT around 2024-01-22.
Skipping ORIT due to missing data


$ESV: possibly delisted; no timezone found


Error fetching data for ESV: No historical data available for ESV around 2024-01-22.
Skipping ESV due to missing data


$JPM-I: possibly delisted; no timezone found


Error fetching data for JPM-I: No historical data available for JPM-I around 2024-01-22.
Skipping JPM-I due to missing data


$EPR-F: possibly delisted; no timezone found


Error fetching data for EPR-F: No historical data available for EPR-F around 2024-01-22.
Skipping EPR-F due to missing data


$SHLD: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22) (Yahoo error = "Data doesn't exist for startDate = 1703307600, endDate = 1705899600")


Error fetching data for SHLD: No historical data available for SHLD around 2024-01-22.
Skipping SHLD due to missing data


$CST: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for CST: No historical data available for CST around 2024-01-22.
Skipping CST due to missing data


$CVO: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for CVO: No historical data available for CVO around 2024-01-22.
Skipping CVO due to missing data


$MRGE: possibly delisted; no price data found  (period=1d)


Error fetching data for MRGE: single positional indexer is out-of-bounds
Skipping MRGE due to missing data


$AGO-F: possibly delisted; no timezone found


Error fetching data for AGO-F: No historical data available for AGO-F around 2024-01-22.
Skipping AGO-F due to missing data


$CPAH: possibly delisted; no timezone found


Error fetching data for CPAH: No historical data available for CPAH around 2024-01-22.
Skipping CPAH due to missing data


$ECOL: possibly delisted; no timezone found
$NQC: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for ECOL: No historical data available for ECOL around 2024-01-22.
Skipping ECOL due to missing data
Error fetching data for NQC: No historical data available for NQC around 2024-01-22.
Skipping NQC due to missing data


$PROV: possibly delisted; no price data found  (period=1d)
$KERX: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for PROV: single positional indexer is out-of-bounds
Skipping PROV due to missing data
Error fetching data for KERX: No historical data available for KERX around 2024-01-22.
Skipping KERX due to missing data


$SFD: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)
$ZOLT: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for SFD: No historical data available for SFD around 2024-01-22.
Skipping SFD due to missing data
Error fetching data for ZOLT: No historical data available for ZOLT around 2024-01-22.
Skipping ZOLT due to missing data


$SDR: possibly delisted; no timezone found


Error fetching data for SDR: No historical data available for SDR around 2024-01-22.
Skipping SDR due to missing data


$BPI: possibly delisted; no timezone found
$OIBR: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for BPI: No historical data available for BPI around 2024-01-22.
Skipping BPI due to missing data
Error fetching data for OIBR: No historical data available for OIBR around 2024-01-22.
Skipping OIBR due to missing data


$APRI: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)
$HCLP: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for APRI: No historical data available for APRI around 2024-01-22.
Skipping APRI due to missing data
Error fetching data for HCLP: No historical data available for HCLP around 2024-01-22.
Skipping HCLP due to missing data


$RNET: possibly delisted; no timezone found


Error fetching data for RNET: No historical data available for RNET around 2024-01-22.
Skipping RNET due to missing data


$INSY: possibly delisted; no timezone found


Error fetching data for INSY: No historical data available for INSY around 2024-01-22.
Skipping INSY due to missing data


$KIM-J: possibly delisted; no timezone found


Error fetching data for KIM-J: No historical data available for KIM-J around 2024-01-22.
Skipping KIM-J due to missing data


$MOG.B: possibly delisted; no timezone found
$ONFC: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for MOG.B: No historical data available for MOG.B around 2024-01-22.
Skipping MOG.B due to missing data
Error fetching data for ONFC: No historical data available for ONFC around 2024-01-22.
Skipping ONFC due to missing data


$BMS: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)
$CRDS: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for BMS: No historical data available for BMS around 2024-01-22.
Skipping BMS due to missing data
Error fetching data for CRDS: No historical data available for CRDS around 2024-01-22.
Skipping CRDS due to missing data


$KEF: possibly delisted; no price data found  (1d 2023-12-23 -> 2024-01-22)


Error fetching data for KEF: No historical data available for KEF around 2024-01-22.
Skipping KEF due to missing data


  combined_test_data = pd.concat(test_data_list, ignore_index=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  to[n].fillna(self.na_dict[n], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  to[n].fillna(self.na_dict[n], inplace=True)


KeyError: "['index'] not in index"

## Tests

In [452]:
print('Model files in modelFolder:')
for file in modelFolder.glob('*.pkl'):
    print(file.name)

Model files in modelFolder:
stockScreenerV0.0.pkl
stockScreenerV1.0.pkl


In [453]:
evaluations = pd.read_csv(modelFolder / 'modelEvaluations.csv')
bestModel = evaluations.sort_values('MAE', ascending=True).iloc[0]
bestModel.head()

Model Name    stockScreenerV0.0
Timestamp      2025-01-21 17:41
MAE                       0.203
RMSE                      0.247
R2                       -2.154
Name: 1, dtype: object

### Load model

In [454]:
if os.name == 'nt':
    temp = pathlib.PosixPath
    pathlib.PosixPath = pathlib.WindowsPath

importedModel = Path(f"{bestModel['Model Name']}.pkl") # Change this if you want to try other models
learn = load_learner(modelFolder / importedModel)

## Predictions

In [None]:
if predictionTarget != None:
    if predictionTarget == 'ALL':
        adr_df = pd.read_csv(testFolder / 'tickers.csv')
        symbols = adr_df['Ticker'].tolist()
        test_df = [getTickerData(symbol) for symbol in symbols]
        test_df = pd.concat(test_df, ignore_index=True)  # Concatenate list of DataFrames into a single DataFrame
    else:
        test_df = getTickerData(predictionTarget)

    # Ensure test_df is a DataFrame
    if isinstance(test_df, dict):
        test_df = pd.DataFrame([test_df])

    dl = learn.dls.test_dl(test_df)
    test_df.head()

    if predictionTarget == 'ALL':
        prediction = learn.get_preds(dl=dl)
        adr_df = pd.read_csv(testFolder / 'tickers.csv')
        company_dict = dict(zip(adr_df['Ticker'], adr_df['Company']))
        sorted_predictions = sorted(zip(symbols, prediction[0]), key=lambda x: x[1], reverse=True)
        print(f"Got predictions for {len(sorted_predictions)} tickers, expected: {len(symbols)}")
        print(f"Prediction for best performing tickers:")
        for symbol, pred in sorted_predictions:
            company_name = company_dict.get(symbol, 'Unknown')
            print(f"{symbol} ({company_name}): {pred[0].item() * 100:.2f}%")
    else:
        prediction = learn.get_preds(dl=dl)
        company_name = company_dict.get(predictionTarget, 'Unknown')
        print(f"Prediction for {predictionTarget} ({company_name}):")
        print(f"{prediction[0][0][0].item() * 100:.2f}%")
    print("Free money?!")

predictionTarget = 'ALL' # 'ALL' for all tickers decending, 'None' for no prediction