# Stock Prediction Model

## Dependencies

### Library Installation (if needed)

In [110]:
#! pip install yfinance pandas numpy matplotlib lightgbm torch tensorflow keras

### Importing Required Libraries

In [111]:
from datetime import datetime
import matplotlib.pyplot as plt
import yfinance as yf
import pandas as pd
import numpy as np
import pickle
import gradio as gr
from tqdm import tqdm
from sklearn.impute import SimpleImputer
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV

### Variables

In [112]:
print_errors = True
build_new_dataset = False
train_new_model = False 
symbol_list = 'advanced' # 'simple' or 'advanced'. simple are the tickers from the screener notebook and advanced are all of the tickers in 'filtered_tickers'
feature_amount = 3000 # deprecated
verticle_jobs = 1 #'-1' for max

## Data Preparation

In [113]:
if symbol_list == 'simple':
    symbols = pd.read_csv('../data/simple_screener_results.csv')['Ticker'].tolist()
if symbol_list == 'advanced':
    symbols = pd.read_csv('../data/filtered_tickers.csv')['Ticker'].tolist() + \
              pd.read_csv('../data/simple_screener_results.csv')['Ticker'].tolist()

symbols = pd.Series(symbols).unique()
symbols

array(['A', 'AA', 'AAT', ..., 'SYDB.CO', 'UBER', 'GRLA'], dtype=object)

#### Download annual financial data

In [114]:
if build_new_dataset:
    df = pd.DataFrame()
    for symbol in tqdm(symbols):
        ticker_df = pd.DataFrame()
        ticker = yf.Ticker(symbol)
        try:
            # Latest earning data
            latest_earning_date = ticker.quarterly_cash_flow.columns.tolist()[0]
            latest_data = {'Ticker': symbol, 'Date': latest_earning_date}
            latest_data['3M Future Change'] = np.nan
            latest_data['6M Future Change'] = np.nan
            latest_data['1Y Future Change'] = np.nan

            try:
                latest_data['Sector'] = ticker.info['sector']
            except:
                latest_data['Sector'] = 'Unknown'
                
            try:
                latest_data['Industry'] = ticker.info['industry']
            except:
                latest_data['Industry'] = 'Unknown'

            for column in ticker.quarterly_cash_flow[latest_earning_date].keys().tolist():
                latest_data[column] = ticker.quarterly_cash_flow[latest_earning_date][column]

            for column in ticker.quarterly_balance_sheet[latest_earning_date].keys().tolist():
                latest_data[column] = ticker.quarterly_balance_sheet[latest_earning_date][column]

            for column in ticker.quarterly_income_stmt[latest_earning_date].keys().tolist():
                latest_data[column] = ticker.quarterly_income_stmt[latest_earning_date][column]

            ticker_df = pd.concat([ticker_df, pd.DataFrame([latest_data])], ignore_index=True)

            # Annual data
            earning_dates = ticker.cash_flow.columns.tolist()
            price_data = yf.download(symbol, period='10y', rounding=False, progress=False)
            cash_flow = ticker.cash_flow
            cash_flow_columns = cash_flow[earning_dates[0]].keys().tolist()
            balance_sheet = ticker.balance_sheet
            balance_sheet_columns = balance_sheet[earning_dates[0]].keys().tolist()
            income_statement = ticker.income_stmt
            income_statement_columns = income_statement[earning_dates[0]].keys().tolist()
            for earning_date in earning_dates:
                earning_date_data = {'Ticker': symbol, 'Date': earning_date}
                got_price = False
                day_offset = 0
                while(got_price==False and day_offset > -6):
                    try:           
                        earning_date_data['3M Future Change'] = (
                            float(price_data.loc[earning_date + pd.Timedelta(days=day_offset, weeks=13) , ('Close', symbol)]) / 
                            float(price_data.loc[earning_date + pd.Timedelta(days=day_offset), ('Close', symbol)]) - 1
                        )
                        earning_date_data['6M Future Change'] = (
                            float(price_data.loc[earning_date + pd.Timedelta(days=day_offset, weeks=26) , ('Close', symbol)]) / 
                            float(price_data.loc[earning_date + pd.Timedelta(days=day_offset), ('Close', symbol)]) - 1
                        )
                        earning_date_data['1Y Future Change'] = (
                            float(price_data.loc[earning_date + pd.Timedelta(days=day_offset, weeks=52) , ('Close', symbol)]) / 
                            float(price_data.loc[earning_date + pd.Timedelta(days=day_offset), ('Close', symbol)]) - 1
                        )
                        got_price = True
                    except:
                        day_offset += -1

                try:
                    earning_date_data['Sector'] = ticker.info['sector']
                except:
                    earning_date_data['Sector'] = 'Unknown'
                
                try:
                    earning_date_data['Industry'] = ticker.info['industry']
                except:
                    earning_date_data['Industry'] = 'Unknown'

                for column in cash_flow_columns:
                    earning_date_data[column] = cash_flow[earning_date][column]

                for column in balance_sheet_columns:
                    earning_date_data[column] = balance_sheet[earning_date][column]

                for column in income_statement_columns:
                    earning_date_data[column] = income_statement[earning_date][column]
                
                if got_price:
                    ticker_df = pd.concat([ticker_df, pd.DataFrame([earning_date_data])], ignore_index=True)

            if len(ticker_df) != 5:
                continue

            imputer = SimpleImputer()
            for column in ticker_df.columns.drop(['Ticker', 'Date', '3M Future Change', '6M Future Change', '1Y Future Change', 'Sector', 'Industry']):
                if not ticker_df[column].isna().all():
                    ticker_df[column] = imputer.fit_transform(ticker_df[[column]])
            
            df = pd.concat([df, ticker_df], ignore_index=True)
        except Exception as error:
            if print_errors:
                print(f'Error for {symbol}: {error}')
            continue

    df.to_csv('../data/earnings_data.csv', index=False)
else:
    df = pd.read_csv('../data/earnings_data.csv')

### Short visualisation

In [115]:
df

Unnamed: 0,Ticker,Date,3M Future Change,6M Future Change,1Y Future Change,Sector,Industry,Free Cash Flow,Repurchase Of Capital Stock,Repayment Of Debt,...,Paymentsto Suppliersfor Goodsand Services,Classesof Cash Receiptsfrom Operating Activities,Other Cash Receiptsfrom Operating Activities,Receiptsfrom Customers,Excise Taxes,Current Deferred Taxes Assets,Change In Dividend Payable,Depletion,Net Income Extraordinary,Net Income From Tax Loss Carryforward
0,AA,2025-03-31,,,,Basic Materials,Aluminum,-1.800000e+07,-2.166667e+08,-9.460000e+08,...,,,,,,,,,,
1,AA,2023-12-31,-0.018451,0.130405,0.123098,Basic Materials,Aluminum,-4.400000e+08,0.000000e+00,-7.200000e+07,...,,,,,,,,,,
2,AA,2022-12-31,-0.062309,-0.250328,-0.243343,Basic Materials,Aluminum,3.420000e+08,-5.000000e+08,-1.000000e+06,...,,,,,,,,,,
3,AA,2021-12-31,0.522660,-0.246283,-0.231174,Basic Materials,Aluminum,5.300000e+08,-1.500000e+08,-1.294000e+09,...,,,,,,,,,,
4,AA,2020-12-31,0.398265,0.603471,1.574374,Basic Materials,Aluminum,1.035000e+08,-2.166667e+08,-5.782500e+08,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,UBER,2025-03-31,,,,Technology,Software - Application,2.250000e+09,-1.785000e+09,-4.700000e+07,...,,,,,,,,,,
4916,UBER,2023-12-31,0.219354,0.114032,-0.025024,Technology,Software - Application,3.362000e+09,0.000000e+00,-2.871000e+09,...,,,,,,,,,,
4917,UBER,2022-12-31,0.281844,0.745653,1.489689,Technology,Software - Application,3.900000e+08,0.000000e+00,-2.640000e+08,...,,,,,,,,,,
4918,UBER,2021-12-31,-0.141903,-0.491057,-0.410208,Technology,Software - Application,-7.430000e+08,-4.462500e+08,-8.420000e+08,...,,,,,,,,,,


### Removal of low datapoint columns

In [116]:
# columns_to_remove = [col for col in df.columns if df[col].notna().sum() < feature_amount]

# df = df.drop(columns=columns_to_remove)
# print(columns_to_remove)
# df

### Impution and encoding

In [117]:
imputer = SimpleImputer()
scaler = StandardScaler()
for column in df.columns.drop(['Ticker', 'Date', '3M Future Change', '6M Future Change', '1Y Future Change', 'Sector', 'Industry']):
    df[column] = imputer.fit_transform(df[[column]])
    scaler.fit(df[[column]])
    df[column] = scaler.transform(df[[column]])

le = LabelEncoder()
for column in ['Ticker', 'Sector', 'Industry']:
    df[column] = df[column].astype(str)
    le.fit(df[column])
    df[column] = le.transform(df[column])

### Splitting

In [118]:
pred_data = pd.DataFrame()
for i in range(int(len(df) / 5)):
    pred_data = pd.concat([pred_data, df.iloc[[i*5]]]) 
print('Prediction Data:')
display(pred_data)

train_data = pd.DataFrame()
for i in range(int(len(df) / 5)):
    train_data = pd.concat([train_data, df.iloc[[1+i*5]]])
    train_data = pd.concat([train_data, df.iloc[[2+i*5]]])
    train_data = pd.concat([train_data, df.iloc[[3+i*5]]])
    train_data = pd.concat([train_data, df.iloc[[4+i*5]]])
print('Train and Test Data:')
display(train_data)

Prediction Data:


Unnamed: 0,Ticker,Date,3M Future Change,6M Future Change,1Y Future Change,Sector,Industry,Free Cash Flow,Repurchase Of Capital Stock,Repayment Of Debt,...,Paymentsto Suppliersfor Goodsand Services,Classesof Cash Receiptsfrom Operating Activities,Other Cash Receiptsfrom Operating Activities,Receiptsfrom Customers,Excise Taxes,Current Deferred Taxes Assets,Change In Dividend Payable,Depletion,Net Income Extraordinary,Net Income From Tax Loss Carryforward
0,0,2025-03-31,,,,0,5,-0.036500,0.080057,0.054585,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
5,2,2025-03-31,,,,8,94,-0.036458,0.000000,0.054774,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
10,3,2025-01-31,,,,7,120,-0.036617,0.095185,0.054759,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
15,4,2025-03-31,,,,8,98,-0.036314,0.092014,0.054026,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
20,5,2025-03-31,,,,6,74,-0.035447,0.075152,0.054568,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4895,350,2025-03-31,,,,4,82,-0.029817,0.054321,0.054762,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
4900,137,2024-12-31,,,,2,67,0.042095,-4.268509,0.050812,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
4905,547,2025-03-31,,,,2,67,-0.059289,-0.150089,0.047552,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
4910,535,2024-12-31,,,,1,42,-0.035956,0.000000,0.054417,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0


Train and Test Data:


Unnamed: 0,Ticker,Date,3M Future Change,6M Future Change,1Y Future Change,Sector,Industry,Free Cash Flow,Repurchase Of Capital Stock,Repayment Of Debt,...,Paymentsto Suppliersfor Goodsand Services,Classesof Cash Receiptsfrom Operating Activities,Other Cash Receiptsfrom Operating Activities,Receiptsfrom Customers,Excise Taxes,Current Deferred Taxes Assets,Change In Dividend Payable,Depletion,Net Income Extraordinary,Net Income From Tax Loss Carryforward
1,0,2023-12-31,-0.018451,0.130405,0.123098,0,5,-0.036968,0.096835,0.054851,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
2,0,2022-12-31,-0.062309,-0.250328,-0.243343,0,5,-0.036102,0.058116,0.054873,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
3,0,2021-12-31,0.522660,-0.246283,-0.231174,0,5,-0.035893,0.085219,0.054479,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
4,0,2020-12-31,0.398265,0.603471,1.574374,0,5,-0.036366,0.080057,0.054697,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
6,2,2023-12-31,-0.031142,-0.031311,0.220023,8,94,-0.036363,0.000000,0.054816,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4914,535,2020-12-31,-0.048055,-0.134439,-0.731121,1,42,-0.037304,0.000000,0.052411,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
4916,967,2023-12-31,0.219354,0.114032,-0.025024,9,117,-0.032755,0.096835,0.053998,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
4917,967,2022-12-31,0.281844,0.745653,1.489689,9,117,-0.036048,0.096835,0.054793,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
4918,967,2021-12-31,-0.141903,-0.491057,-0.410208,9,117,-0.037304,0.062278,0.054616,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0


### Labeling

In [119]:
X_pred = pred_data.drop(['Date', '3M Future Change', '6M Future Change', '1Y Future Change'], axis=1)
X_train = train_data.drop(['Date', '3M Future Change', '6M Future Change', '1Y Future Change'], axis=1)
y_train = train_data[['3M Future Change', '6M Future Change', '1Y Future Change']]

X_train, X_test, y_train, y_test = train_test_split(
    X_train,
    y_train,
    test_size=0.15,
    shuffle=True,
)

display(X_train)
display(y_train)
display(X_test)
display(y_test)

Unnamed: 0,Ticker,Sector,Industry,Free Cash Flow,Repurchase Of Capital Stock,Repayment Of Debt,Issuance Of Debt,Capital Expenditure,End Cash Position,Beginning Cash Position,...,Paymentsto Suppliersfor Goodsand Services,Classesof Cash Receiptsfrom Operating Activities,Other Cash Receiptsfrom Operating Activities,Receiptsfrom Customers,Excise Taxes,Current Deferred Taxes Assets,Change In Dividend Payable,Depletion,Net Income Extraordinary,Net Income From Tax Loss Carryforward
4379,751,7,122,-0.036503,0.096254,0.054870,-0.054246,5.286399e-02,-0.072217,-0.072135,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.000000,-1.420274e-15,0.0,0.0
4414,767,9,115,-0.036363,0.087864,0.054873,-0.054253,5.285753e-02,-0.072189,-0.072068,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.000000,-1.420274e-15,0.0,0.0
3963,473,2,68,-0.035721,0.096835,0.054501,-0.054211,5.275466e-02,-0.071531,-0.071079,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.000000,-1.420274e-15,0.0,0.0
3614,316,2,48,-0.035856,0.088582,0.054861,-0.054251,5.285365e-02,-0.071482,-0.071685,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.000000,-1.420274e-15,0.0,0.0
2927,922,7,120,-0.036572,0.000000,0.054769,-0.054209,5.286106e-02,-0.072215,-0.072127,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.000000,-1.420274e-15,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2444,761,5,13,-0.029306,-0.062358,0.052818,-0.049827,7.516115e-18,-0.048873,-0.040014,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.000000,-1.420274e-15,0.0,0.0
1807,573,4,80,-0.036125,0.096452,0.054750,-0.054253,5.208803e-02,-0.072124,-0.072044,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.000000,-1.420274e-15,0.0,0.0
838,241,5,8,-0.036214,0.095085,0.000000,0.000000,5.288273e-02,-0.072125,-0.072145,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,44.590994,-1.420274e-15,0.0,0.0
1098,335,7,120,-0.036045,0.075302,0.000000,0.000000,5.283174e-02,-0.071379,-0.071359,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.000000,-1.420274e-15,0.0,0.0


Unnamed: 0,3M Future Change,6M Future Change,1Y Future Change
4379,0.080040,0.051724,-0.290447
4414,0.044708,0.011322,0.138717
3963,-0.163950,-0.188359,-0.379287
3614,0.162369,0.126778,-0.148387
2927,0.043810,0.580952,0.257143
...,...,...,...
2444,0.199210,0.304439,0.385154
1807,0.169811,-0.058176,0.055031
838,-0.045413,-0.286643,-0.279960
1098,0.089421,-0.105598,-0.243949


Unnamed: 0,Ticker,Sector,Industry,Free Cash Flow,Repurchase Of Capital Stock,Repayment Of Debt,Issuance Of Debt,Capital Expenditure,End Cash Position,Beginning Cash Position,...,Paymentsto Suppliersfor Goodsand Services,Classesof Cash Receiptsfrom Operating Activities,Other Cash Receiptsfrom Operating Activities,Receiptsfrom Customers,Excise Taxes,Current Deferred Taxes Assets,Change In Dividend Payable,Depletion,Net Income Extraordinary,Net Income From Tax Loss Carryforward
3416,229,5,46,-0.032738,0.000000,0.054868,-0.054253,5.281012e-02,-0.006025,0.032309,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
3442,247,5,13,-0.035330,0.096512,0.054644,-0.053775,5.285870e-02,-0.071335,-0.070085,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
884,254,0,18,-0.036437,0.000000,0.054767,-0.054136,5.273816e-02,-0.072167,-0.072037,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
4123,586,2,49,-0.036444,0.096835,0.054830,-0.054223,5.288146e-02,-0.072237,-0.072149,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
1059,326,6,33,-0.030565,0.075965,0.054582,-0.051739,5.190368e-02,-0.069333,-0.069016,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1146,347,2,111,-0.035426,0.061314,0.054867,-0.054253,5.229991e-02,-0.071960,-0.071819,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
2036,660,11,136,-0.034538,0.089711,0.051647,-0.048808,4.347915e-02,-0.069824,-0.069571,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
2774,880,0,116,-0.036443,0.096756,0.054873,-0.054212,5.283396e-02,-0.072172,-0.072127,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0
458,153,8,101,-0.036349,0.096835,0.054846,-0.054229,7.516115e-18,-0.072246,-0.072156,...,0.0,0.0,6.504749e-16,-1.077312e-15,6.160338e-16,0.0,0.0,-1.420274e-15,0.0,0.0


Unnamed: 0,3M Future Change,6M Future Change,1Y Future Change
3416,0.025604,-0.063183,0.157666
3442,-0.282530,-0.308901,-0.058557
884,0.069705,0.001882,-0.040057
4123,-0.184375,-0.295057,-0.517559
1059,0.014205,0.227532,0.483223
...,...,...,...
1146,-0.000729,-0.000415,-0.047551
2036,0.056948,0.227084,0.220540
2774,0.120381,-0.240835,-0.236464
458,0.031041,-0.073135,-0.193752


## Model Training

In [120]:
if train_new_model:
    base_model = MLPRegressor()

    param_grid = {
        'shuffle': [True], 
        'solver': ['adam'], 
        'learning_rate': ['adaptive'],
        'tol': [0.0001],
        'max_iter': [200],
        'alpha': [0.0001],
        'hidden_layer_sizes': [(500, 500, 500), (500, 500, 500, 500), (1000, 1000, 1000), (1000, 1000, 1000, 1000)],
        'learning_rate_init': [0.001],
        'n_iter_no_change': [6, 9]
    }

    grid_search = GridSearchCV(
        estimator=base_model, 
        param_grid=param_grid, 
        scoring='r2', 
        cv=3, 
        n_jobs=3,
        verbose=verticle_jobs,
    )
    grid_search.fit(X_train, y_train)

    best_params = grid_search.best_params_
    model = grid_search.best_estimator_
    print('Best Parameters:')
    print(best_params)

    model.fit(X_train, y_train)

## Testing and benchmarking

In [121]:
if train_new_model:
    y_test_pred = model.predict(X_test)

    for i, target in enumerate(['3M Future Change', '6M Future Change', '1Y Future Change']):
        y_test_actual = y_test[target]
        y_test_pred_target = y_test_pred[:, i]

        plt.figure(figsize=(11, 6))
        plt.scatter(y_test_actual, y_test_pred_target, alpha=0.7, color='blue', label='Predictions')
        plt.plot([y_test_actual.min(), y_test_actual.max()], [y_test_actual.min(), y_test_actual.max()], 
            color='red', linestyle='--', label='Perfect Fit')
        plt.title(f'Predicted vs Actual Values ({target})')
        plt.xlabel('Actual Values')
        plt.ylabel('Predicted Values')
        plt.legend()
        plt.grid(True)
        plt.show()

        mae = mean_absolute_error(y_test_actual, y_test_pred_target)
        mse = mean_squared_error(y_test_actual, y_test_pred_target)
        r2 = r2_score(y_test_actual, y_test_pred_target)

        print(f'{target} - R²: {r2:.4f}')
        print(f'{target} - MSE: {mse:.4f}')
        print(f'{target} - MAE: {mae:.4f}')

    mae = mean_absolute_error(y_test, y_test_pred)
    mse = mean_squared_error(y_test, y_test_pred)
    r2 = r2_score(y_test, y_test_pred)

    print('\nOverall Scores:')
    print(f'Mean - R²: {r2:.4f}')
    print(f'Mean - MSE: {mse:.4f}')
    print(f'Mean - MAE: {mae:.4f}')

### Log test results

In [122]:
if train_new_model:
    test_results = pd.DataFrame({
        'R²': r2,
        'MSE': mse,
        'MAE': mae,
        'symbol_list': symbol_list,
        'hidden_layer_sizes': [model.hidden_layer_sizes],
        'max_iter': model.max_iter,
        'n_iter_no_change': model.n_iter_no_change,
        'learning_rate': model.learning_rate,
        'learning_rate_init': model.learning_rate_init,
        'batch_size': model.batch_size,
        'tol': model.tol,
        'alpha': model.alpha,
        'shuffle': model.shuffle,
    })
    test_results.to_csv('../data/test_results.csv', mode='a', index=False)

    # save model as new best if results are better than the current one
    best_r2 = pd.read_csv('../models/best_model_results.csv').loc[0, 'R²']
    if r2 > best_r2:
        print(f'Old best R²: {best_r2}')
        print(f'New best R²: {r2}')
        print('Saving new best model...')
        test_results.to_csv('../models/best_model_results.csv', mode='w', index=False)
        with open('../models/best_model.pkl','wb') as f:
            pickle.dump(model,f)
        

## Predictions on latest data

In [123]:
best_r2 = pd.read_csv('../models/best_model_results.csv').loc[0, 'R²']

with open('../models/best_model.pkl', 'rb') as f:
    model = pickle.load(f)
    print(f'Best model R²: {best_r2}')


Best model R²: -0.0893079795170797


In [124]:
df_raw = pd.read_csv('../data/earnings_data.csv')

results = []
for i in range(len(X_pred)):
    y_pred = model.predict(X_pred.iloc[[i]])[0]
    y_pred_3m, y_pred_6m, y_pred_1y = y_pred
    avg = (y_pred_3m + y_pred_6m + y_pred_1y) / 3
    results.append({
        'Ticker': df_raw.loc[i*5, 'Ticker'],
        'mean (%)': avg * 100,
        '3m (%)': y_pred_3m * 100,
        '6m (%)': y_pred_6m * 100,
        '1y (%)': y_pred_1y * 100
    })

results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Ticker,mean (%),3m (%),6m (%),1y (%)
0,AA,17.282441,19.936060,4.394326,27.516937
1,AAT,5.880127,-7.759504,8.239136,17.160750
2,ABM,7.328053,-9.512793,9.929937,21.567015
3,ABR,5.005326,-8.874480,6.322241,17.568216
4,ABT,4.912593,-4.421287,5.239655,13.919410
...,...,...,...,...,...
979,EQNR,7.802442,2.323224,10.336088,10.748015
980,BABA,2.789531,-9.315250,6.400422,11.283420
981,JD,3.075465,3.360969,-0.431078,6.296503
982,IQ,10.234500,7.562893,11.631753,11.508856


In [125]:
def predict_ticker(ticker_str):
    try:
        ticker_str = str(ticker_str).upper()
        ticker_str = ticker_str.replace("'", "")
        ticker_str = ticker_str.replace('"', "")
        row = results_df[results_df['Ticker'] == ticker_str]
        if row.empty:
            return (f"The author was stupid and forgot to cover this obvious, famous stock. Try another.", "", "", "")
        row = row.iloc[0]
        return (
            f"{row['3m (%)']:.2f}",
            f"{row['6m (%)']:.2f}",
            f"{row['1y (%)']:.2f}",
            f"{row['mean (%)']:.2f}",
        )
    except Exception as e:
        return (f"Error: {e}", "", "", "")

In [126]:
iface = gr.Interface(
    fn=predict_ticker,
    inputs=gr.Textbox(label="Ticker (e.g. 'TSLA' or 'MATAS.CO')"),
    outputs=[
        gr.Textbox(label="3 Month Change Prediction (%)"),
        gr.Textbox(label="6 Month Change Prediction (%)"),
        gr.Textbox(label="1 Year Change Prediction (%)"),
        gr.Textbox(label="Mean Change Prediction (%)"),
    ],
    title="Stock Price Prediction Model",
    description=" "
)

iface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7868
* Running on public URL: https://901cad062fc3f6446d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


