# Stock Price Modeling from Fundamentals

In [235]:
# Todo :
# - Create labels from stock price data before and after earnings. 
#   This should correlate the change in stock price with the change in fundamentals
# - Create validation and test sets
#   4th most recent quarter's price for validation.
#   3 most recent quarters' price for test
# - Start modeling. Try some baselines first (e.g. mean estimates, repeating previous periods)
#   Try some additional models (e.g. ARIMA, XGBoost trees)

In [236]:
from pathlib import Path
import pandas as pd

prices = pd.read_csv(Path("preprocessed") / "KMB_prices.csv")
financials = pd.read_csv(Path("preprocessed") / "KMB_financials.csv")

prices.shape, financials.shape

((6283, 6), (448, 318))

In [237]:
prices["target"] = prices["Close"] - prices["Open"]
prices = prices[["Date", "target"]] # backward looking
priced = prices.sort_values(by="Date")
prices.head(3)

Unnamed: 0,Date,target
0,2001.142077,-0.561141
1,2001.144809,0.08313
2,2001.147541,-0.062352


In [238]:
financials["period_end"] = financials["period_end"] # + 90/366
financials.head(3)

Unnamed: 0,period_end,filing_date,AccountsPayableCurrent,AccountsPayableTradeCurrent,AccountsReceivableNetCurrent,AccruedAdvertisingCurrent,AccruedIncomeTaxesNoncurrent,AccruedLiabilitiesCurrent,AccruedSalariesCurrent,AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment,...,UnrecordedUnconditionalPurchaseObligationBalanceOnThirdAnniversary,UnrecordedUnconditionalPurchaseObligationDueAfterFiveYears,VariableLeaseCost,WeightedAverageNumberOfDilutedSharesOutstanding,WeightedAverageNumberOfSharesIssuedBasic,WeightedAverageNumberOfSharesOutstandingBasic,0,1,2,3
0,2006.997268,2010.150273,,,,,,,,,...,,,,,,,1,0,0,0
1,2007.997268,2009.598361,,,,,,,,,...,,,,,,,0,1,0,0
2,2007.997268,2009.846995,,,,,,,,,...,,,,,,,0,0,1,0


In [239]:
merged = prices.merge(financials, left_on="Date", right_on="period_end").sort_values(by="period_end")
merged.head(3)

Unnamed: 0,Date,target,period_end,filing_date,AccountsPayableCurrent,AccountsPayableTradeCurrent,AccountsReceivableNetCurrent,AccruedAdvertisingCurrent,AccruedIncomeTaxesNoncurrent,AccruedLiabilitiesCurrent,...,UnrecordedUnconditionalPurchaseObligationBalanceOnThirdAnniversary,UnrecordedUnconditionalPurchaseObligationDueAfterFiveYears,VariableLeaseCost,WeightedAverageNumberOfDilutedSharesOutstanding,WeightedAverageNumberOfSharesIssuedBasic,WeightedAverageNumberOfSharesOutstandingBasic,0,1,2,3
0,2007.997268,-0.162019,2007.997268,2009.598361,,,,,,,...,,,,,,,0,1,0,0
1,2007.997268,-0.162019,2007.997268,2009.846995,,,,,,,...,,,,,,,0,0,1,0
2,2007.997268,-0.162019,2007.997268,2010.150273,,,,,,,...,,,,,,,1,0,0,0


In [240]:
dates = pd.read_csv(Path("preprocessed") / "KMB_dates.csv")
dates.head(3)

Unnamed: 0,filing_date,report_date
0,2026.117486,2025.997268
1,2026.10929,2026.10929
2,2026.101093,2025.245902


In [241]:
merged = merged.merge(dates, left_on="Date", right_on="report_date", suffixes=("", "_drop"))
merged.shape

(202, 322)

In [242]:
X = merged.drop(columns=["target", "Date", "filing_date", "report_date"]) # filing date can reflect some arbitrary date in the future.
X = X.rename(columns={"filing_date_drop": "filing_date"})
y = merged["target"]

X.shape, y.shape

((202, 318), (202,))

In [243]:
X.head()

Unnamed: 0,period_end,AccountsPayableCurrent,AccountsPayableTradeCurrent,AccountsReceivableNetCurrent,AccruedAdvertisingCurrent,AccruedIncomeTaxesNoncurrent,AccruedLiabilitiesCurrent,AccruedSalariesCurrent,AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment,AccumulatedOtherComprehensiveIncomeLossCumulativeChangesInNetGainLossFromCashFlowHedgesEffectNetOfTax,...,UnrecordedUnconditionalPurchaseObligationDueAfterFiveYears,VariableLeaseCost,WeightedAverageNumberOfDilutedSharesOutstanding,WeightedAverageNumberOfSharesIssuedBasic,WeightedAverageNumberOfSharesOutstandingBasic,0,1,2,3,filing_date
0,2016.748634,2454000000.0,,2222000000.0,,,1731000000.0,,9729000000.0,,...,,,361950000.0,,359600000.0,0,1,0,0,2016.814208
1,2016.748634,,,,,,,,,,...,,,361950000.0,,359600000.0,0,1,0,0,2016.814208
2,2017.087432,,,,,,,,,,...,,,,,,1,0,0,0,2017.090164
3,2017.245902,2571000000.0,,2224000000.0,,,1620000000.0,,9940000000.0,,...,,,358600000.0,,356000000.0,0,1,0,0,2017.311475
4,2017.245902,,,,,,,,,,...,,,358600000.0,,356000000.0,0,1,0,0,2017.311475


In [244]:
mask = X["period_end"] >= 2025
X_train, X_test = X.loc[~mask, :], X.loc[mask, :]
y_train, y_test = y.loc[~mask], y.loc[mask]

X_train.shape, y_train.shape, X_test.shape, y_test.shape 

((158, 318), (158,), (44, 318), (44,))

In [245]:
# test 4 different events for validation, 6 events for test 
X_val, X_test = X_test.loc[:197,:], X_test.loc[197:,:]
y_val, y_test = y_test.loc[:197], y_test.loc[197:]

X_val.shape, y_val.shape, X_test.shape, y_test.shape,

((40, 318), (40,), (5, 318), (5,))

# Prediction Task  
  
Predict the day-of-earnings price movement (i.e. close price minus open price)

## Baseline

In [246]:
mean_pred = y_train.mean()
max_pred = y_train.max()
med_pred = y_train.median()
recent_pred = y_train.iloc[-1]

mean_pred, max_pred, med_pred, recent_pred,

(np.float64(0.4280892710839123),
 np.float64(1.4560127780106598),
 np.float64(0.444744236984036),
 np.float64(-0.9807497356786712))

# CONTINUE HERE - baseline modeling, then some more advanced models.

In [None]:
import numpy as np
mean_train_loss = np.abs()