# Stock Price Prediction Model

## Dependencies

### Library Installation (if needed)

In [1]:
#! pip install -Ur requirements.txt

### Importing Required Libraries

In [2]:
import matplotlib.pyplot as plt
import yfinance as yf
import pandas as pd
import numpy as np
import pickle
import gradio as gr
import ast
import warnings
from methods.model_methods import *
from datetime import datetime
from tqdm import tqdm
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.impute import SimpleImputer
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=SyntaxWarning)

  from .autonotebook import tqdm as notebook_tqdm


### Variables

In [3]:
symbol_list = input("Symbols: ('simple', 'filtered' or 'all')") # 'simple' or 'all'. simple are the tickers from the screener notebook and all are all of the tickers in 'filtered_tickers'. You may also an 'Int' to get a % amount of random tickers from 'all'
if symbol_list:
    build_new_dataset = True
else:
    build_new_dataset = False
    symbol_list = 'filtered'
minimum_feature_threshold = 0.6
outlier = 3
tree_amount = input("Tree amount:")
if tree_amount:
    tree_amount = int(tree_amount)
    train_new_model = True
else:
    train_new_model = False
if build_new_dataset or train_new_model:
    debugging = {'True': True, 'False': False}.get(input('Debug? (Bool)'))
else:
    debugging = False

In [4]:
symbols = pd.read_csv('../data/tickers/simple_tickers.csv')['Ticker'].tolist()
if symbol_list == 'filtered':
    symbols = symbols + pd.read_csv('../data/tickers/model_filtered_tickers.csv')['Ticker'].tolist()
elif symbol_list == 'all':
    symbols = symbols + pd.read_csv('../data/tickers/tickers.csv')['Ticker'].tolist()
elif symbol_list.isdigit():
    all_symbols = pd.read_csv('../data/tickers/tickers.csv')['Ticker'].tolist()
    num_symbols = max(1, round(len(all_symbols) * (int(symbol_list) / 100)))  
    symbols = symbols + pd.read_csv("../data/tickers/model_filtered_tickers.csv")["Ticker"].tolist()
    symbols = symbols + np.random.choice(all_symbols, num_symbols, replace=False).tolist()
symbols = pd.Series(symbols).unique()

## Data Preparation

#### Test load

In [5]:
test_stock = Stock("MSFT").get_df_financials()
if debugging:
    display(test_stock)

Unnamed: 0,Ticker,Name,Date,Earn Index,Sector,Industry,3M Future Change,TaxEffectOfUnusualItems,TaxRateForCalcs,NormalizedEBITDA,...,RawMaterials,Receivables,AccountsReceivable,AllowanceForDoubtfulAccountsReceivable,GrossAccountsReceivable,CashCashEquivalentsAndShortTermInvestments,OtherShortTermInvestments,CashAndCashEquivalents,CashEquivalents,CashFinancial
0,MSFT,Microsoft Corporation,2025-03-31,0,Technology,Software - Infrastructure,,69660000.0,0.18,40324000000.0,...,327000000.0,51700000000.0,51700000000.0,-695000000.0,52395000000.0,79612000000.0,50784000000.0,28828000000.0,18148000000.0,10680000000.0
1,MSFT,Microsoft Corporation,2024-12-31,1,Technology,Software - Infrastructure,-0.091444,-203220000.0,0.18,37915000000.0,...,345000000.0,48188000000.0,48188000000.0,-662000000.0,48850000000.0,71551000000.0,54069000000.0,17482000000.0,7835000000.0,9647000000.0
2,MSFT,Microsoft Corporation,2024-09-30,2,Technology,Software - Infrastructure,-0.010736,57190000.0,0.19,37933000000.0,...,488000000.0,44148000000.0,44148000000.0,-647000000.0,44795000000.0,78429000000.0,57589000000.0,20840000000.0,10252000000.0,10588000000.0
3,MSFT,Microsoft Corporation,2024-06-30,3,Technology,Software - Infrastructure,-0.040628,-99918000.0,0.182,133558000000.0,...,394000000.0,56924000000.0,56924000000.0,-830000000.0,57754000000.0,75531000000.0,57216000000.0,18315000000.0,6744000000.0,11571000000.0
5,MSFT,Microsoft Corporation,2023-06-30,5,Technology,Software - Infrastructure,-0.070833,-2850000.0,0.19,105155000000.0,...,709000000.0,48688000000.0,48688000000.0,-650000000.0,49338000000.0,111256000000.0,76552000000.0,34704000000.0,26226000000.0,8478000000.0
6,MSFT,Microsoft Corporation,2022-06-30,6,Technology,Software - Infrastructure,-0.073301,43754000.0,0.131,99905000000.0,...,1144000000.0,44261000000.0,44261000000.0,-633000000.0,44894000000.0,104749000000.0,90818000000.0,13931000000.0,5673000000.0,8258000000.0


#### Download annual financial data

In [6]:
if build_new_dataset:
    df = pd.DataFrame()
    filtered_pd = pd.read_csv("../data/tickers/model_filtered_tickers.csv")
    for symbol in tqdm(symbols, smoothing=0):
        ticker_df = pd.DataFrame()
        ticker = yf.Ticker(symbol)
        stock = Stock(symbol)
        try:
            ticker_df = Stock(symbol).get_df_financials()
            if ticker_df.isna().sum().sum() < round(test_stock.shape[1] * minimum_feature_threshold):
                if symbol not in filtered_pd["Ticker"].tolist():
                    filtered_pd = pd.concat([filtered_pd, pd.DataFrame([{"Ticker": symbol}])])
                imputer = SimpleImputer()
                for column in ticker_df.columns.drop(["Ticker", "Name", "Date", "3M Future Change", "Sector", "Industry"]):
                    if not ticker_df[column].isna().all():
                        ticker_df[column] = imputer.fit_transform(ticker_df[[column]])
            else:
                if symbol in filtered_pd["Ticker"].tolist():
                    filtered_pd = filtered_pd[filtered_pd["Ticker"] != symbol]
                    if debugging:
                        print(f"Removed {symbol} from filtered tickers. Datapoints: {ticker_df.isna().sum().sum()}, Needed: {round(test_stock.shape[1] * minimum_feature_threshold)}")
                continue
            df = pd.concat([df, ticker_df], ignore_index=True)
        except Exception as error:
            if symbol in filtered_pd["Ticker"].tolist():
                filtered_pd = filtered_pd[filtered_pd["Ticker"] != symbol]
                if debugging:
                    print(f"Removed {symbol} from filtered tickers because an exception was raised \n {error}")
            else:
                if debugging:
                    print(f"{symbol}: exception raised: {error}")
            continue
    filtered_pd.to_csv("../data/tickers/model_filtered_tickers.csv", index=False)
    df.to_csv("../data/model_results/earnings_data.csv", index=False)
else:
    df = pd.read_csv("../data/model_results/earnings_data.csv")

  3%|▎         | 27/1010 [02:02<1:14:34,  4.55s/it]

Removed BAVA.CO  from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


  3%|▎         | 32/1010 [02:22<1:12:25,  4.44s/it]

Removed NETC.CO from filtered tickers. Datapoints: 92, Needed: 79


  4%|▍         | 39/1010 [02:56<1:13:22,  4.53s/it]

Removed GOOGL from filtered tickers. Datapoints: 89, Needed: 79


  6%|▌         | 57/1010 [04:15<1:11:09,  4.48s/it]

FOBANK.CO: exception raised: 'shortName'


  7%|▋         | 75/1010 [05:37<1:10:08,  4.50s/it]

Removed HALO from filtered tickers. Datapoints: 97, Needed: 79


  8%|▊         | 79/1010 [05:55<1:09:52,  4.50s/it]

Removed APAM from filtered tickers. Datapoints: 81, Needed: 79


  8%|▊         | 81/1010 [06:04<1:09:40,  4.50s/it]

Removed OMAB from filtered tickers. Datapoints: 83, Needed: 79


  8%|▊         | 83/1010 [06:14<1:09:45,  4.52s/it]

Removed EQNR from filtered tickers. Datapoints: 102, Needed: 79


  9%|▊         | 88/1010 [06:36<1:09:17,  4.51s/it]

Removed PDD from filtered tickers. Datapoints: 84, Needed: 79


  9%|▉         | 94/1010 [07:07<1:09:24,  4.55s/it]

Removed GOOG from filtered tickers. Datapoints: 89, Needed: 79


 10%|▉         | 100/1010 [07:35<1:09:05,  4.56s/it]

Removed IESC from filtered tickers. Datapoints: 123, Needed: 79


 10%|█         | 101/1010 [07:41<1:09:11,  4.57s/it]

Removed UHS from filtered tickers. Datapoints: 83, Needed: 79


 11%|█         | 111/1010 [08:27<1:08:32,  4.57s/it]

Removed KFRC from filtered tickers. Datapoints: 105, Needed: 79


 12%|█▏        | 117/1010 [08:58<1:08:33,  4.61s/it]

Removed PLAB from filtered tickers. Datapoints: 145, Needed: 79


 12%|█▏        | 120/1010 [09:11<1:08:11,  4.60s/it]

Removed DKS from filtered tickers. Datapoints: 83, Needed: 79


 12%|█▏        | 121/1010 [09:15<1:08:03,  4.59s/it]

Removed MPWR from filtered tickers. Datapoints: 114, Needed: 79


 12%|█▏        | 123/1010 [09:26<1:08:07,  4.61s/it]

Removed TNK from filtered tickers. Datapoints: 114, Needed: 79


 12%|█▏        | 125/1010 [09:36<1:08:01,  4.61s/it]

Removed MTH from filtered tickers. Datapoints: 80, Needed: 79


 13%|█▎        | 130/1010 [10:04<1:08:11,  4.65s/it]

Removed EBF from filtered tickers. Datapoints: 79, Needed: 79


 13%|█▎        | 136/1010 [10:33<1:07:53,  4.66s/it]

Removed IT from filtered tickers. Datapoints: 97, Needed: 79


 14%|█▍        | 139/1010 [10:47<1:07:39,  4.66s/it]

Removed NTES from filtered tickers. Datapoints: 185, Needed: 79


 14%|█▍        | 140/1010 [10:53<1:07:40,  4.67s/it]

Removed WEYS from filtered tickers. Datapoints: 80, Needed: 79


 14%|█▍        | 141/1010 [10:58<1:07:37,  4.67s/it]

Removed LOGI from filtered tickers. Datapoints: 91, Needed: 79


 14%|█▍        | 146/1010 [11:28<1:07:52,  4.71s/it]

Removed THC from filtered tickers. Datapoints: 90, Needed: 79


 15%|█▍        | 150/1010 [11:50<1:07:55,  4.74s/it]

Removed TGT from filtered tickers. Datapoints: 111, Needed: 79


 16%|█▋        | 166/1010 [13:07<1:06:42,  4.74s/it]

Removed SPLP from filtered tickers. Datapoints: 127, Needed: 79


 17%|█▋        | 175/1010 [13:51<1:06:06,  4.75s/it]

Removed SFM from filtered tickers. Datapoints: 86, Needed: 79


 18%|█▊        | 186/1010 [14:40<1:05:00,  4.73s/it]

Removed REGN from filtered tickers. Datapoints: 111, Needed: 79


 20%|██        | 204/1010 [16:00<1:03:16,  4.71s/it]

Removed MC.PA from filtered tickers. Datapoints: 113, Needed: 79


 21%|██        | 212/1010 [16:40<1:02:45,  4.72s/it]

Removed XOM from filtered tickers. Datapoints: 79, Needed: 79


 21%|██▏       | 216/1010 [16:58<1:02:25,  4.72s/it]

Removed ALFA.ST from filtered tickers. Datapoints: 173, Needed: 79


 22%|██▏       | 220/1010 [17:15<1:01:59,  4.71s/it]

Removed RACE.MI from filtered tickers. Datapoints: 85, Needed: 79


 23%|██▎       | 237/1010 [18:35<1:00:39,  4.71s/it]

Removed EBAY from filtered tickers. Datapoints: 96, Needed: 79


 24%|██▎       | 238/1010 [18:40<1:00:34,  4.71s/it]

Removed RUSHA from filtered tickers. Datapoints: 102, Needed: 79


 24%|██▍       | 240/1010 [18:50<1:00:26,  4.71s/it]

Removed ABT from filtered tickers. Datapoints: 91, Needed: 79


 24%|██▍       | 242/1010 [18:58<1:00:11,  4.70s/it]

Removed RUSHB from filtered tickers. Datapoints: 102, Needed: 79


 25%|██▍       | 248/1010 [19:30<59:55,  4.72s/it]  

Removed PCAR from filtered tickers. Datapoints: 84, Needed: 79


 25%|██▍       | 249/1010 [19:37<59:58,  4.73s/it]

Removed TXN from filtered tickers. Datapoints: 79, Needed: 79


 28%|██▊       | 279/1010 [21:50<57:14,  4.70s/it]

Removed PLUS from filtered tickers. Datapoints: 131, Needed: 79


 28%|██▊       | 285/1010 [22:17<56:43,  4.69s/it]

Removed DGX from filtered tickers. Datapoints: 88, Needed: 79


 31%|███       | 310/1010 [24:13<54:42,  4.69s/it]

Removed SMCI from filtered tickers. Datapoints: 94, Needed: 79


 31%|███       | 315/1010 [24:39<54:23,  4.70s/it]

Removed CI from filtered tickers. Datapoints: 188, Needed: 79


 31%|███▏      | 317/1010 [24:47<54:11,  4.69s/it]

Removed ADUS from filtered tickers. Datapoints: 81, Needed: 79


 32%|███▏      | 326/1010 [25:31<53:34,  4.70s/it]

Removed CNMD from filtered tickers. Datapoints: 94, Needed: 79


 32%|███▏      | 328/1010 [25:41<53:24,  4.70s/it]

Removed BIDU from filtered tickers. Datapoints: 139, Needed: 79


 34%|███▍      | 344/1010 [27:00<52:16,  4.71s/it]

Removed NATR from filtered tickers. Datapoints: 99, Needed: 79


 34%|███▍      | 346/1010 [27:10<52:09,  4.71s/it]

Removed DOLE from filtered tickers. Datapoints: 110, Needed: 79


 35%|███▍      | 349/1010 [27:24<51:54,  4.71s/it]

Removed GPC from filtered tickers. Datapoints: 90, Needed: 79


 35%|███▍      | 350/1010 [27:29<51:50,  4.71s/it]

Removed GPN from filtered tickers. Datapoints: 119, Needed: 79


 35%|███▌      | 355/1010 [27:52<51:26,  4.71s/it]

Removed NGS from filtered tickers. Datapoints: 96, Needed: 79


 36%|███▌      | 361/1010 [28:22<51:00,  4.72s/it]

Removed L.TO from filtered tickers. Datapoints: 141, Needed: 79


 37%|███▋      | 369/1010 [28:59<50:21,  4.71s/it]HTTP Error 404: 
 37%|███▋      | 370/1010 [29:01<50:11,  4.71s/it]

Removed AMED from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 37%|███▋      | 371/1010 [29:04<50:05,  4.70s/it]

Removed TEL from filtered tickers. Datapoints: 92, Needed: 79


 38%|███▊      | 380/1010 [29:47<49:23,  4.70s/it]

Removed BBSI from filtered tickers. Datapoints: 91, Needed: 79


 38%|███▊      | 381/1010 [29:52<49:18,  4.70s/it]

Removed NWE from filtered tickers. Datapoints: 84, Needed: 79


 38%|███▊      | 384/1010 [30:08<49:08,  4.71s/it]

Removed LH from filtered tickers. Datapoints: 113, Needed: 79


 38%|███▊      | 388/1010 [30:26<48:48,  4.71s/it]

Removed TRMB from filtered tickers. Datapoints: 92, Needed: 79


 39%|███▉      | 394/1010 [30:57<48:23,  4.71s/it]

Removed FLEX from filtered tickers. Datapoints: 89, Needed: 79


 40%|████      | 406/1010 [31:55<47:29,  4.72s/it]

Removed MKC from filtered tickers. Datapoints: 124, Needed: 79


 42%|████▏     | 425/1010 [33:24<45:59,  4.72s/it]

Removed WIHL.ST from filtered tickers. Datapoints: 115, Needed: 79


 42%|████▏     | 429/1010 [33:44<45:42,  4.72s/it]

Removed CW from filtered tickers. Datapoints: 81, Needed: 79


 43%|████▎     | 434/1010 [34:07<45:18,  4.72s/it]

Removed BCO from filtered tickers. Datapoints: 179, Needed: 79


 44%|████▎     | 441/1010 [34:41<44:45,  4.72s/it]

Removed SNX from filtered tickers. Datapoints: 117, Needed: 79


 44%|████▍     | 442/1010 [34:45<44:40,  4.72s/it]

Removed GNK from filtered tickers. Datapoints: 84, Needed: 79


 45%|████▍     | 452/1010 [35:33<43:53,  4.72s/it]

Removed HIMX from filtered tickers. Datapoints: 112, Needed: 79


 45%|████▌     | 456/1010 [35:50<43:33,  4.72s/it]

Removed FME.DE from filtered tickers. Datapoints: 193, Needed: 79


 45%|████▌     | 458/1010 [36:04<43:29,  4.73s/it]

Removed MGEE from filtered tickers. Datapoints: 82, Needed: 79


 46%|████▌     | 465/1010 [36:39<42:57,  4.73s/it]

Removed FMS from filtered tickers. Datapoints: 193, Needed: 79


 47%|████▋     | 471/1010 [37:07<42:28,  4.73s/it]

Removed WBS from filtered tickers. Datapoints: 108, Needed: 79


 49%|████▊     | 492/1010 [38:44<40:47,  4.73s/it]

Removed ISRG from filtered tickers. Datapoints: 79, Needed: 79


 50%|████▉     | 504/1010 [39:40<39:50,  4.72s/it]

Removed III from filtered tickers. Datapoints: 95, Needed: 79


 51%|█████     | 513/1010 [40:21<39:05,  4.72s/it]

Removed EMR from filtered tickers. Datapoints: 158, Needed: 79


 52%|█████▏    | 528/1010 [41:32<37:55,  4.72s/it]

Removed CNH from filtered tickers. Datapoints: 106, Needed: 79


 53%|█████▎    | 535/1010 [42:05<37:22,  4.72s/it]

Removed CDNS from filtered tickers. Datapoints: 132, Needed: 79


 54%|█████▍    | 544/1010 [42:45<36:38,  4.72s/it]

Removed EBTC from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 54%|█████▍    | 546/1010 [42:54<36:27,  4.71s/it]HTTP Error 404: 
 54%|█████▍    | 547/1010 [42:56<36:20,  4.71s/it]

Removed BRKL from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 56%|█████▌    | 567/1010 [44:24<34:41,  4.70s/it]

Removed WSP.TO from filtered tickers. Datapoints: 86, Needed: 79


 57%|█████▋    | 576/1010 [45:05<33:58,  4.70s/it]

Removed SUPN from filtered tickers. Datapoints: 111, Needed: 79


 58%|█████▊    | 585/1010 [45:43<33:12,  4.69s/it]

Removed JNPR from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 59%|█████▉    | 596/1010 [46:31<32:19,  4.68s/it]

Removed APO from filtered tickers. Datapoints: 111, Needed: 79


 59%|█████▉    | 598/1010 [46:40<32:09,  4.68s/it]

Removed ARL from filtered tickers. Datapoints: 97, Needed: 79


 61%|██████    | 618/1010 [48:00<30:27,  4.66s/it]

Removed BP from filtered tickers. Datapoints: 100, Needed: 79


 62%|██████▏   | 628/1010 [48:37<29:34,  4.65s/it]

Removed COTY from filtered tickers. Datapoints: 93, Needed: 79


 66%|██████▌   | 664/1010 [50:59<26:34,  4.61s/it]

Removed FGB from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 67%|██████▋   | 675/1010 [51:40<25:38,  4.59s/it]

Removed GES from filtered tickers. Datapoints: 98, Needed: 79


 68%|██████▊   | 683/1010 [52:10<24:58,  4.58s/it]

Removed GTI from filtered tickers. Datapoints: 119, Needed: 79


 68%|██████▊   | 684/1010 [52:14<24:54,  4.58s/it]

Removed GTN from filtered tickers. Datapoints: 81, Needed: 79


 69%|██████▉   | 701/1010 [53:26<23:33,  4.57s/it]

Removed IPI from filtered tickers. Datapoints: 88, Needed: 79


 70%|███████   | 709/1010 [53:56<22:54,  4.57s/it]

Removed KB from filtered tickers. Datapoints: 93, Needed: 79


 71%|███████   | 714/1010 [54:18<22:30,  4.56s/it]

Removed KRG from filtered tickers. Datapoints: 79, Needed: 79


 72%|███████▏  | 723/1010 [55:01<21:50,  4.57s/it]HTTP Error 404: 
 72%|███████▏  | 724/1010 [55:03<21:45,  4.56s/it]

Removed MAV from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 72%|███████▏  | 728/1010 [55:20<21:26,  4.56s/it]HTTP Error 404: 
 72%|███████▏  | 729/1010 [55:22<21:20,  4.56s/it]

Removed MHI from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 74%|███████▍  | 748/1010 [56:44<19:52,  4.55s/it]

Removed NC from filtered tickers. Datapoints: 98, Needed: 79


 75%|███████▍  | 754/1010 [57:09<19:24,  4.55s/it]

Removed NUS from filtered tickers. Datapoints: 130, Needed: 79


 75%|███████▌  | 759/1010 [57:31<19:01,  4.55s/it]

Removed OMI from filtered tickers. Datapoints: 91, Needed: 79


 76%|███████▌  | 763/1010 [57:50<18:43,  4.55s/it]HTTP Error 404: 
 76%|███████▌  | 764/1010 [57:53<18:38,  4.55s/it]

Removed PCK from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 76%|███████▌  | 767/1010 [58:03<18:23,  4.54s/it]

Removed PDM from filtered tickers. Datapoints: 86, Needed: 79


 77%|███████▋  | 774/1010 [58:32<17:50,  4.54s/it]HTTP Error 404: 
 77%|███████▋  | 775/1010 [58:34<17:45,  4.53s/it]

Removed PMF from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 77%|███████▋  | 777/1010 [58:41<17:36,  4.53s/it]HTTP Error 404: 
 77%|███████▋  | 778/1010 [58:43<17:30,  4.53s/it]

Removed PMX from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


HTTP Error 404: 
 77%|███████▋  | 779/1010 [58:45<17:25,  4.53s/it]

Removed PNF from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 78%|███████▊  | 784/1010 [59:05<17:01,  4.52s/it]HTTP Error 404: 
 78%|███████▊  | 785/1010 [59:06<16:56,  4.52s/it]

Removed PYN from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


HTTP Error 404: 
 78%|███████▊  | 786/1010 [59:08<16:51,  4.51s/it]

Removed PZC from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 78%|███████▊  | 789/1010 [59:21<16:37,  4.51s/it]

Removed RIG from filtered tickers. Datapoints: 79, Needed: 79


 79%|███████▊  | 793/1010 [59:37<16:18,  4.51s/it]

Removed S from filtered tickers. Datapoints: 85, Needed: 79


 79%|███████▉  | 800/1010 [1:00:10<15:47,  4.51s/it]

Removed SOL from filtered tickers. Datapoints: 87, Needed: 79


 80%|███████▉  | 805/1010 [1:00:31<15:24,  4.51s/it]

Removed TCI from filtered tickers. Datapoints: 93, Needed: 79


 80%|████████  | 811/1010 [1:00:59<14:57,  4.51s/it]HTTP Error 404: 
 80%|████████  | 812/1010 [1:01:01<14:52,  4.51s/it]

Removed USM from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 81%|████████  | 814/1010 [1:01:06<14:42,  4.50s/it]

Removed XIN from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 81%|████████  | 816/1010 [1:01:13<14:33,  4.50s/it]

Removed ADI from filtered tickers. Datapoints: 107, Needed: 79


 81%|████████  | 817/1010 [1:01:18<14:28,  4.50s/it]

Removed AEIS from filtered tickers. Datapoints: 145, Needed: 79


 81%|████████▏ | 822/1010 [1:01:40<14:06,  4.50s/it]

Removed ALNY from filtered tickers. Datapoints: 122, Needed: 79


 82%|████████▏ | 825/1010 [1:01:53<13:52,  4.50s/it]

Removed AMRN from filtered tickers. Datapoints: 122, Needed: 79


 82%|████████▏ | 828/1010 [1:02:06<13:39,  4.50s/it]

Removed ATEC from filtered tickers. Datapoints: 84, Needed: 79


 82%|████████▏ | 832/1010 [1:02:28<13:21,  4.50s/it]

Removed BCRX from filtered tickers. Datapoints: 130, Needed: 79


 83%|████████▎ | 834/1010 [1:02:35<13:12,  4.50s/it]

Removed BLDP from filtered tickers. Datapoints: 90, Needed: 79


 84%|████████▎ | 844/1010 [1:03:17<12:26,  4.50s/it]

Removed CIEN from filtered tickers. Datapoints: 98, Needed: 79


 84%|████████▍ | 848/1010 [1:03:32<12:08,  4.50s/it]

Removed CLRO from filtered tickers. Datapoints: 110, Needed: 79


 84%|████████▍ | 850/1010 [1:03:42<11:59,  4.50s/it]

Removed CNTY from filtered tickers. Datapoints: 104, Needed: 79


 84%|████████▍ | 852/1010 [1:03:50<11:50,  4.50s/it]
1 Failed download:
['CRESW']: YFInvalidPeriodError("CRESW: Period 'max' is invalid, must be one of: 1d, 5d")

1 Failed download:
['CRESW']: YFInvalidPeriodError("CRESW: Period 'max' is invalid, must be one of: 1d, 5d")

1 Failed download:
['CRESW']: YFInvalidPeriodError("CRESW: Period 'max' is invalid, must be one of: 1d, 5d")

1 Failed download:
['CRESW']: YFInvalidPeriodError("CRESW: Period 'max' is invalid, must be one of: 1d, 5d")

1 Failed download:
['CRESW']: YFInvalidPeriodError("CRESW: Period 'max' is invalid, must be one of: 1d, 5d")

1 Failed download:
['CRESW']: YFInvalidPeriodError("CRESW: Period 'max' is invalid, must be one of: 1d, 5d")

1 Failed download:
['CRESW']: YFInvalidPeriodError("CRESW: Period 'max' is invalid, must be one of: 1d, 5d")
 84%|████████▍ | 853/1010 [1:03:53<11:45,  4.49s/it]HTTP Error 404: 
 85%|████████▍ | 854/1010 [1:03:55<11:40,  4.49s/it]

Removed CUBA from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 85%|████████▍ | 856/1010 [1:04:03<11:31,  4.49s/it]

Removed CZR from filtered tickers. Datapoints: 93, Needed: 79


 85%|████████▌ | 861/1010 [1:04:26<11:09,  4.49s/it]

Removed EQIX from filtered tickers. Datapoints: 178, Needed: 79


 86%|████████▌ | 867/1010 [1:04:50<10:41,  4.49s/it]

Removed FOLD from filtered tickers. Datapoints: 112, Needed: 79


 87%|████████▋ | 878/1010 [1:05:39<09:52,  4.49s/it]

Removed HIFS from filtered tickers. Datapoints: 95, Needed: 79


 88%|████████▊ | 887/1010 [1:06:22<09:12,  4.49s/it]

Removed INTC from filtered tickers. Datapoints: 126, Needed: 79


 88%|████████▊ | 888/1010 [1:06:27<09:07,  4.49s/it]

Removed INTG from filtered tickers. Datapoints: 112, Needed: 79


 88%|████████▊ | 889/1010 [1:06:31<09:03,  4.49s/it]

Removed IRIX from filtered tickers. Datapoints: 90, Needed: 79


 88%|████████▊ | 892/1010 [1:06:43<08:49,  4.49s/it]HTTP Error 404: 
 88%|████████▊ | 893/1010 [1:06:45<08:44,  4.49s/it]

Removed KIRK from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 91%|█████████ | 920/1010 [1:08:33<06:42,  4.47s/it]

Removed PACB from filtered tickers. Datapoints: 80, Needed: 79


 91%|█████████▏| 924/1010 [1:08:50<06:24,  4.47s/it]

Removed PETS from filtered tickers. Datapoints: 84, Needed: 79


 92%|█████████▏| 925/1010 [1:08:54<06:19,  4.47s/it]

Removed PLCE from filtered tickers. Datapoints: 82, Needed: 79


 92%|█████████▏| 926/1010 [1:08:59<06:15,  4.47s/it]

Removed PRKR from filtered tickers. Datapoints: 98, Needed: 79


 93%|█████████▎| 939/1010 [1:09:53<05:17,  4.47s/it]

Removed RGLS from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 94%|█████████▎| 945/1010 [1:10:14<04:49,  4.46s/it]

Removed SGRP from filtered tickers. Datapoints: 92, Needed: 79


 94%|█████████▎| 946/1010 [1:10:18<04:45,  4.46s/it]

Removed SLAB from filtered tickers. Datapoints: 151, Needed: 79


 94%|█████████▍| 948/1010 [1:10:25<04:36,  4.46s/it]

Removed SMTC from filtered tickers. Datapoints: 94, Needed: 79


 94%|█████████▍| 950/1010 [1:10:33<04:27,  4.46s/it]

Removed SNPS from filtered tickers. Datapoints: 121, Needed: 79


 94%|█████████▍| 952/1010 [1:10:40<04:18,  4.45s/it]

Removed SOHO from filtered tickers. Datapoints: 95, Needed: 79


 95%|█████████▌| 961/1010 [1:11:10<03:37,  4.44s/it]

Removed TGTX from filtered tickers. Datapoints: 93, Needed: 79


 95%|█████████▌| 962/1010 [1:11:12<03:33,  4.44s/it]

Removed THRD from filtered tickers because an exception was raised 
 "['Ticker', 'Name', 'Date', '3M Future Change', 'Sector', 'Industry'] not found in axis"


 96%|█████████▌| 969/1010 [1:11:54<03:02,  4.45s/it]

Removed UHAL from filtered tickers. Datapoints: 98, Needed: 79


 97%|█████████▋| 982/1010 [1:12:48<02:04,  4.45s/it]

Removed INVE-B.ST from filtered tickers. Datapoints: 86, Needed: 79


 97%|█████████▋| 984/1010 [1:12:57<01:55,  4.45s/it]

Removed GLEN.L from filtered tickers. Datapoints: 138, Needed: 79


100%|█████████▉| 1009/1010 [1:14:51<00:04,  4.45s/it]

Removed CSU.TO from filtered tickers. Datapoints: 88, Needed: 79


100%|██████████| 1010/1010 [1:14:56<00:00,  4.45s/it]


### Short visualisation

In [7]:
if debugging:
    display(df)

Unnamed: 0,Ticker,Name,Date,Earn Index,Sector,Industry,3M Future Change,TaxEffectOfUnusualItems,TaxRateForCalcs,NormalizedEBITDA,...,LiabilitiesHeldforSaleNonCurrent,DuetoRelatedPartiesNonCurrent,OccupancyAndEquipment,ProfessionalExpenseAndContractServicesExpense,PolicyholderBenefitsGross,PolicyholderBenefitsCeded,DepletionIncomeStatement,OtherNonInterestExpense,SecuritiesAmortization,ExciseTaxes
0,BAESY,BAE Systems PLC,2024-12-31,1.0,Industrials,Aerospace & Defense,0.451540,-2.520000e+06,0.180000,3.986000e+09,...,,,,,,,,,,
1,BAESY,BAE Systems PLC,2023-12-31,3.0,Industrials,Aerospace & Defense,0.216240,-5.567000e+07,0.190000,3.740000e+09,...,,,,,,,,,,
2,BAESY,BAE Systems PLC,2022-12-31,4.0,Industrials,Aerospace & Defense,0.171293,7.744344e+07,0.158371,2.570000e+09,...,,,,,,,,,,
3,BAESY,BAE Systems PLC,2021-12-31,5.0,Industrials,Aerospace & Defense,0.257805,2.160000e+07,0.180000,3.005000e+09,...,,,,,,,,,,
4,LLY,Eli Lilly and Company,2025-03-31,0.0,Healthcare,Drug Manufacturers - General,,-3.541868e+08,0.202000,5.916000e+09,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4572,FM.TO,FIRST QUANTUM MINERALS LTD,2024-09-30,2.0,Basic Materials,Copper,0.010846,-4.050000e+06,0.150000,5.230000e+08,...,,,,,,,,,,
4573,FM.TO,FIRST QUANTUM MINERALS LTD,2024-06-30,3.0,Basic Materials,Copper,0.054535,-1.095000e+07,0.150000,3.560000e+08,...,,,,,,,,,,
4574,FM.TO,FIRST QUANTUM MINERALS LTD,2023-12-31,5.0,Basic Materials,Copper,0.317647,-4.064000e+08,0.400000,2.292000e+09,...,,,,,,,,,,
4575,FM.TO,FIRST QUANTUM MINERALS LTD,2022-12-31,6.0,Basic Materials,Copper,0.098268,4.048000e+07,0.220000,3.099000e+09,...,,,,,,,,,,


### Impution and encoding

In [8]:
imputer = SimpleImputer()
scaler = StandardScaler()
for column in df.columns.drop(["Ticker", "Name", "Date", "Earn Index", "3M Future Change", "Sector", "Industry"]):
    df[column] = imputer.fit_transform(df[[column]])
    scaler.fit(df[[column]])
    df[column] = scaler.transform(df[[column]])

le = LabelEncoder()
for column in ["Ticker", "Sector", "Industry"]:
    df[column] = df[column].astype(str)
    le.fit(df[column])
    df[column] = le.transform(df[column])

### Splitting

In [10]:
pred_data = df.loc[df["Earn Index"] == 0].copy()
test_data  = df.loc[df["Earn Index"] == 1].copy()
train_data = df.loc[~df["Earn Index"].isin([0, 1])].copy()

pred_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)
train_data.reset_index(drop=True, inplace=True)

if debugging:
    print('Prediction Data:')
    display(pred_data)
    print("Test Data:")
    display(test_data)
    print('Training Data:')
    display(train_data)

Prediction Data:


Unnamed: 0,Ticker,Name,Date,Earn Index,Sector,Industry,3M Future Change,TaxEffectOfUnusualItems,TaxRateForCalcs,NormalizedEBITDA,...,LiabilitiesHeldforSaleNonCurrent,DuetoRelatedPartiesNonCurrent,OccupancyAndEquipment,ProfessionalExpenseAndContractServicesExpense,PolicyholderBenefitsGross,PolicyholderBenefitsCeded,DepletionIncomeStatement,OtherNonInterestExpense,SecuritiesAmortization,ExciseTaxes
0,464,Eli Lilly and Company,2025-03-31,0.0,6,30,,-0.407855,0.186254,0.237679,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
1,185,Danske Bank A/S,2025-03-31,0.0,5,11,,-0.003093,0.621869,0.000000,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
2,435,Jyske Bank A/S,2025-06-30,0.0,5,11,,-0.003093,0.822176,0.000000,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
3,560,"Netflix, Inc.",2025-06-30,0.0,1,37,,-0.003093,-0.488949,0.369670,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
4,738,Scandinavian Tobacco Group A/S,2025-03-31,0.0,3,107,,-0.021440,0.491184,-0.169067,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
530,123,CWLTH BANK FPO [CBA],2025-06-30,0.0,5,10,,-0.192778,1.297072,0.000000,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
531,654,ProSiebenSat.1 Media SE N,2023-12-31,0.0,1,16,,-0.011504,-0.009773,-0.179712,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
532,441,Kinnevik AB ser. B,2025-06-30,0.0,5,6,,-0.003093,-2.013602,0.000000,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
533,811,WASTE CONNECTIONS INC,2025-06-30,0.0,7,116,,-0.004263,0.752553,-0.136507,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0


Test Data:


Unnamed: 0,Ticker,Name,Date,Earn Index,Sector,Industry,3M Future Change,TaxEffectOfUnusualItems,TaxRateForCalcs,NormalizedEBITDA,...,LiabilitiesHeldforSaleNonCurrent,DuetoRelatedPartiesNonCurrent,OccupancyAndEquipment,ProfessionalExpenseAndContractServicesExpense,PolicyholderBenefitsGross,PolicyholderBenefitsCeded,DepletionIncomeStatement,OtherNonInterestExpense,SecuritiesAmortization,ExciseTaxes
0,61,BAE Systems PLC,2024-12-31,1.0,7,1,0.451540,-0.005973,-0.053334,0.096969,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
1,464,Eli Lilly and Company,2024-12-31,1.0,6,30,0.044842,-0.793256,-0.216690,1.222071,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
2,185,Danske Bank A/S,2024-12-31,1.0,5,11,0.176085,-0.011424,0.632759,0.000000,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
3,118,Carlsberg B A/S,2024-12-31,1.0,3,12,0.309414,-0.116230,0.142692,0.994522,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
4,435,Jyske Bank A/S,2025-03-31,1.0,5,11,0.163339,-0.003093,0.807005,0.000000,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
668,441,Kinnevik AB ser. B,2025-03-31,1.0,5,6,0.187961,-0.003093,-2.013602,0.000000,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
669,472,LONDON STOCK EXCHANGE GROUP PLC,2024-12-31,1.0,5,40,0.027027,-0.048421,0.905019,0.114539,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
670,304,FRESNILLO PLC ORD USD0.50,2024-12-31,1.0,0,74,0.533387,-0.055384,1.253511,-0.077882,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
671,811,WASTE CONNECTIONS INC,2025-03-31,1.0,7,116,-0.091643,-0.004771,0.469404,-0.143314,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0


Training Data:


Unnamed: 0,Ticker,Name,Date,Earn Index,Sector,Industry,3M Future Change,TaxEffectOfUnusualItems,TaxRateForCalcs,NormalizedEBITDA,...,LiabilitiesHeldforSaleNonCurrent,DuetoRelatedPartiesNonCurrent,OccupancyAndEquipment,ProfessionalExpenseAndContractServicesExpense,PolicyholderBenefitsGross,PolicyholderBenefitsCeded,DepletionIncomeStatement,OtherNonInterestExpense,SecuritiesAmortization,ExciseTaxes
0,61,BAE Systems PLC,2023-12-31,3.0,7,1,0.216240,-0.066712,0.055569,0.079034,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
1,61,BAE Systems PLC,2022-12-31,4.0,7,1,0.171293,0.085409,-0.288882,-0.006267,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
2,61,BAE Systems PLC,2021-12-31,5.0,7,1,0.257805,0.021591,-0.053334,0.025448,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
3,464,Eli Lilly and Company,2024-09-30,2.0,6,30,-0.125086,-1.245866,2.222754,0.174068,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
4,464,Eli Lilly and Company,2024-06-30,3.0,6,30,-0.029119,-0.136669,-0.310010,0.160864,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3364,292,FIRST QUANTUM MINERALS LTD,2024-09-30,2.0,0,26,0.010846,-0.007722,-0.380046,-0.155507,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
3365,292,FIRST QUANTUM MINERALS LTD,2024-06-30,3.0,0,26,0.054535,-0.015607,-0.380046,-0.167682,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
3366,292,FIRST QUANTUM MINERALS LTD,2023-12-31,5.0,0,26,0.317647,-0.467524,2.342548,-0.026535,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
3367,292,FIRST QUANTUM MINERALS LTD,2022-12-31,6.0,0,26,0.098268,0.043167,0.382281,0.032301,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0


### Labeling

In [11]:
training_columns = train_data.columns.drop(["Name", "Sector", "Industry", "Date", '3M Future Change'])
label_columns = ['3M Future Change']
X_pred = pred_data[training_columns]
X_test = test_data[training_columns]
y_test = test_data[label_columns]
X_train = train_data[training_columns]
y_train = train_data[label_columns]
if debugging:
    print("X_pred:")
    display(X_pred)
    print("X_test:")
    display(X_test)
    print("y_test:")
    display(y_test)
    print("X_train:")
    display(X_train)
    print("y_train:")
    display(y_train)

X_pred:


Unnamed: 0,Ticker,Earn Index,TaxEffectOfUnusualItems,TaxRateForCalcs,NormalizedEBITDA,TotalUnusualItems,TotalUnusualItemsExcludingGoodwill,NetIncomeFromContinuingOperationNetMinorityInterest,ReconciledDepreciation,ReconciledCostOfRevenue,...,LiabilitiesHeldforSaleNonCurrent,DuetoRelatedPartiesNonCurrent,OccupancyAndEquipment,ProfessionalExpenseAndContractServicesExpense,PolicyholderBenefitsGross,PolicyholderBenefitsCeded,DepletionIncomeStatement,OtherNonInterestExpense,SecuritiesAmortization,ExciseTaxes
0,464,0.0,-0.407855,0.186254,0.237679,-6.916848e-01,-6.893223e-01,0.059503,-0.067045,-2.132019e-01,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
1,185,0.0,-0.003093,0.621869,0.000000,-1.060253e+00,-1.057767e+00,0.193615,0.919742,-6.452061e-17,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
2,435,0.0,-0.003093,0.822176,0.000000,-8.434582e-03,-6.301482e-03,-0.006634,-0.153817,-6.452061e-17,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
3,560,0.0,-0.003093,-0.488949,0.369670,-1.485529e-18,7.425151e-18,0.075882,0.980186,-3.396913e-03,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
4,738,0.0,-0.021440,0.491184,-0.169067,-2.031789e-02,-1.818080e-02,-0.061640,-0.173764,-2.887858e-01,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
530,123,0.0,-0.192778,1.297072,0.000000,-2.102115e-01,-2.080107e-01,0.389391,0.216889,-6.452061e-17,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
531,654,0.0,-0.011504,-0.009773,-0.179712,-7.637044e-03,-6.301482e-03,-0.069492,-0.138636,-1.734344e-01,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
532,441,0.0,-0.003093,-2.013602,0.000000,-1.485529e-18,7.425151e-18,-0.035938,0.000000,-6.452061e-17,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
533,811,0.0,-0.004263,0.752553,-0.136507,5.909126e-03,8.037410e-03,-0.050958,-0.114148,-2.694462e-01,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0


X_test:


Unnamed: 0,Ticker,Earn Index,TaxEffectOfUnusualItems,TaxRateForCalcs,NormalizedEBITDA,TotalUnusualItems,TotalUnusualItemsExcludingGoodwill,NetIncomeFromContinuingOperationNetMinorityInterest,ReconciledDepreciation,ReconciledCostOfRevenue,...,LiabilitiesHeldforSaleNonCurrent,DuetoRelatedPartiesNonCurrent,OccupancyAndEquipment,ProfessionalExpenseAndContractServicesExpense,PolicyholderBenefitsGross,PolicyholderBenefitsCeded,DepletionIncomeStatement,OtherNonInterestExpense,SecuritiesAmortization,ExciseTaxes
0,61,1.0,-0.005973,-0.053334,0.096969,1.933403e-03,4.063022e-03,0.023564,0.121860,2.509640e-01,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
1,464,1.0,-0.793256,-0.216690,1.222071,-1.663524e+00,-1.660835e+00,0.409836,0.328799,2.058585e-01,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
2,185,1.0,-0.011424,0.632759,0.000000,-4.446895e-03,-2.315134e-03,0.993181,0.103947,-6.452061e-17,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
3,118,1.0,-0.116230,0.142692,0.994522,-1.918682e-01,-1.896735e-01,0.242872,1.119212,2.385201e+00,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
4,435,1.0,-0.003093,0.807005,0.000000,-8.434582e-03,-6.301482e-03,-0.007753,-0.149566,-6.452061e-17,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
668,441,1.0,-0.003093,-2.013602,0.000000,-1.485529e-18,7.425151e-18,-0.199815,0.000000,-6.452061e-17,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
669,472,1.0,-0.048421,0.905019,0.114539,-5.150159e-02,-4.935404e-02,-0.033298,0.535981,-2.843206e-01,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
670,304,1.0,-0.055384,1.253511,-0.077882,-5.330602e-02,-5.115786e-02,-0.057640,-0.019055,-2.114488e-01,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
671,811,1.0,-0.004771,0.469404,-0.143314,4.948094e-03,7.076701e-03,-0.053140,-0.119524,-2.763073e-01,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0


y_test:


Unnamed: 0,3M Future Change
0,0.451540
1,0.044842
2,0.176085
3,0.309414
4,0.163339
...,...
668,0.187961
669,0.027027
670,0.533387
671,-0.091643


X_train:


Unnamed: 0,Ticker,Earn Index,TaxEffectOfUnusualItems,TaxRateForCalcs,NormalizedEBITDA,TotalUnusualItems,TotalUnusualItemsExcludingGoodwill,NetIncomeFromContinuingOperationNetMinorityInterest,ReconciledDepreciation,ReconciledCostOfRevenue,...,LiabilitiesHeldforSaleNonCurrent,DuetoRelatedPartiesNonCurrent,OccupancyAndEquipment,ProfessionalExpenseAndContractServicesExpense,PolicyholderBenefitsGross,PolicyholderBenefitsCeded,DepletionIncomeStatement,OtherNonInterestExpense,SecuritiesAmortization,ExciseTaxes
0,61,3.0,-0.066712,0.055569,0.079034,-0.109323,-0.107156,0.019135,0.029866,0.168966,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
1,61,4.0,0.085409,-0.288882,-0.006267,0.202514,0.204576,0.007235,0.024401,0.116263,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
2,61,5.0,0.021591,-0.053334,0.025448,0.055368,0.057480,0.014706,0.005274,0.107265,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
3,464,2.0,-1.245866,2.222754,0.174068,-1.107281,-1.104779,-0.020535,-0.065831,-0.216815,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
4,464,3.0,-0.136669,-0.310010,0.160864,-0.290444,-0.288216,0.068795,-0.081740,-0.216855,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3364,292,2.0,-0.007722,-0.380046,-0.155507,-0.003251,-0.001119,-0.059113,-0.159282,-0.308000,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
3365,292,3.0,-0.015607,-0.380046,-0.167682,-0.021594,-0.019456,-0.066002,-0.162621,-0.302926,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
3366,292,5.0,-0.467524,2.342548,-0.026535,-0.397633,-0.395369,-0.106625,0.132789,-0.014310,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0
3367,292,6.0,0.043167,0.382281,0.032301,0.080890,0.082993,-0.017685,0.165883,0.003415,...,-4.759382e-16,-1.324493e-15,6.922852e-17,0.0,1.138282e-15,0.0,0.0,-8.378651e-17,3.917480e-15,0.0


y_train:


Unnamed: 0,3M Future Change
0,0.216240
1,0.171293
2,0.257805
3,-0.125086
4,-0.029119
...,...
3364,0.010846
3365,0.054535
3366,0.317647
3367,0.098268


## Model Training

In [12]:
if train_new_model:
    model = RandomForestRegressor(
        n_estimators=tree_amount,
        verbose=True
    )
    model.fit(X_train, y_train.values.ravel())

## Testing and benchmarking

In [13]:
if train_new_model:
    y_test_pred = model.predict(X_test)

    for i, target in enumerate(['3M Future Change']):
        y_test_actual = y_test[target].values
        if getattr(y_test_pred, "ndim", 1) == 1:
            y_test_pred_target = y_test_pred
        else:
            y_test_pred_target = y_test_pred[:, i]

        plt.figure(figsize=(11, 6))
        plt.scatter(y_test_actual, y_test_pred_target, alpha=0.7, color='blue', label='Predictions')
        plt.plot([y_test_actual.min(), y_test_actual.max()], [y_test_actual.min(), y_test_actual.max()], 
            color='red', linestyle='--', label='Perfect Fit')
        plt.title(f'Predicted vs Actual Values ({target})')
        plt.xlabel('Actual Values')
        plt.ylabel('Predicted Values')
        plt.legend()
        plt.grid(True)
        plt.show()

        mae = mean_absolute_error(y_test_actual, y_test_pred_target)
        mse = mean_squared_error(y_test_actual, y_test_pred_target)
        r2 = r2_score(y_test_actual, y_test_pred_target)

        print(f'{target} - R²: {r2:.4f}')
        print(f'{target} - MSE: {mse:.4f}')
        print(f'{target} - MAE: {mae:.4f}')

    # Overall metrics (single target)
    overall_pred = y_test_pred if getattr(y_test_pred, "ndim", 1) == 1 else y_test_pred[:, 0]
    mae = mean_absolute_error(y_test['3M Future Change'].values, overall_pred)
    mse = mean_squared_error(y_test['3M Future Change'].values, overall_pred)
    r2 = r2_score(y_test['3M Future Change'].values, overall_pred)

    print('\nOverall Scores:')
    print(f'Mean - R²: {r2:.4f}')
    print(f'Mean - MSE: {mse:.4f}')
    print(f'Mean - MAE: {mae:.4f}')

### Log test results

In [14]:
if train_new_model:
    test_results = pd.DataFrame({
        'R²': r2,
        'MSE': mse,
        'MAE': mae,
        'symbol_list': symbol_list,
        "iterations": iterations,
        'hidden_layer_sizes': [model.model.hidden_layer_sizes],
        'max_iter': model.model.max_iter,
        'n_iter_no_change': model.model.n_iter_no_change,
        'learning_rate': model.model.learning_rate,
        'learning_rate_init': model.model.learning_rate_init,
        'batch_size': model.model.batch_size,
        'tol': model.model.tol,
        'alpha': model.model.alpha,
        'shuffle': model.model.shuffle,
    })
    test_results.to_csv('../data/model_results/test_results.csv', mode='a', index=False)

    # save model as new best if results are better than the current one
    best_r2 = pd.read_csv('../data/model_results/best_model_results.csv').loc[0, 'R²']
    if r2 > best_r2:
        print(f'Old best R²: {best_r2}')
        print(f'New best R²: {r2}')
        print('Saving new best model...')
        test_results.to_csv('../data/model_results/best_model_results.csv', mode='w', index=False)
        with open('../models/best_model.pkl','wb') as f:
            pickle.dump(model,f)
        

## Predictions on latest data

In [15]:
best_r2 = pd.read_csv('../data/model_results/best_model_results.csv').loc[0, 'R²']

with open('../models/best_model.pkl', 'rb') as f:
    model = pickle.load(f)
    print(f'Best model R²: {best_r2}')


FileNotFoundError: [Errno 2] No such file or directory: '../models/best_model.pkl'

In [None]:
df_raw = pd.read_csv('../data/model_results/earnings_data.csv')

results = []
for i in range(len(X_pred)):
    y_pred = model.predict(X_pred.iloc[[i]])[0]
    y_pred_3m, y_pred_6m, y_pred_9m, y_pred_1y = y_pred
    avg = (y_pred_3m + y_pred_6m + y_pred_9m + y_pred_1y) / 4
    results.append({
        'Ticker': df_raw.loc[i*4, 'Ticker'],
        'Name': df_raw.loc[i*4, 'Name'],
        'mean (%)': avg * 100,
        '3m (%)': y_pred_3m * 100,
        '6m (%)': y_pred_6m * 100,
        '9m (%)': y_pred_9m * 100,
        '1y (%)': y_pred_1y * 100
    })

results_df = pd.DataFrame(results)
results_df

ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- ExciseTaxes


In [None]:
def predict_ticker(ticker_str):
    try:
        ticker_str = str(ticker_str).upper()
        ticker_str = ticker_str.replace("'", "")
        ticker_str = ticker_str.replace('"', "")
        row = results_df[results_df['Ticker'] == ticker_str]
        if row.empty:
            return (f"Not enough data for this stock at this moment \n Try another", "", "", "", "")
        row = row.iloc[0]
        return (
            f"{row['3m (%)']:.2f}",
            f"{row['6m (%)']:.2f}",
            f"{row['9m (%)']:.2f}",
            f"{row['1y (%)']:.2f}",
            f"{row['mean (%)']:.2f}",
        )
    except Exception as e:
        return (f"Error: {e}", "", "", "", "")

In [None]:
iface = gr.Interface(
    fn=predict_ticker,
    inputs=gr.Textbox(label="Ticker e.g. 'TSLA' or 'NVDA'"),
    outputs=[
        gr.Textbox(label="3 Month Change Prediction (%)"),
        gr.Textbox(label="6 Month Change Prediction (%)"),
        gr.Textbox(label="9 Month Change Prediction (%)"),
        gr.Textbox(label="1 Year Change Prediction (%)"),
        gr.Textbox(label="Mean Change Prediction (%)"),
    ],
    title="Stock Price Prediction Model",
    description=" "
)

iface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://d6f7567b4876962a6e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


