# Stock Price Modeling from Fundamentals

In [60]:
# Todo :
# - Create labels from stock price data before and after earnings. 
#   This should correlate the change in stock price with the change in fundamentals
# - Create validation and test sets
#   4th most recent quarter's price for validation.
#   3 most recent quarters' price for test
# - Start modeling. Try some baselines first (e.g. mean estimates, repeating previous periods)
#   Try some additional models (e.g. ARIMA, XGBoost trees)

In [61]:
from pathlib import Path
import pandas as pd

prices = pd.read_csv(Path("preprocessed") / "KMB_prices.csv")
financials = pd.read_csv(Path("preprocessed") / "KMB_financials.csv")

prices.shape, financials.shape

((3017, 6), (448, 318))

In [62]:
prices["target"] = prices["Close"] - prices["Open"]
prices = prices[["Date", "target"]] # backward looking
prices.head(3)

Unnamed: 0,Date,target
0,2014.13388,-0.166648
1,2014.136612,-0.416627
2,2014.139344,0.544822


In [63]:
financials.head(3)

Unnamed: 0,period_end,filing_date,AccountsPayableCurrent,AccountsPayableTradeCurrent,AccountsReceivableNetCurrent,AccruedAdvertisingCurrent,AccruedIncomeTaxesNoncurrent,AccruedLiabilitiesCurrent,AccruedSalariesCurrent,AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment,...,UnrecordedUnconditionalPurchaseObligationBalanceOnThirdAnniversary,UnrecordedUnconditionalPurchaseObligationDueAfterFiveYears,VariableLeaseCost,WeightedAverageNumberOfDilutedSharesOutstanding,WeightedAverageNumberOfSharesIssuedBasic,WeightedAverageNumberOfSharesOutstandingBasic,0,1,2,3
0,2006.997268,2010.150273,,,,,,,,,...,,,,,,,1,0,0,0
1,2007.997268,2009.598361,,,,,,,,,...,,,,,,,0,1,0,0
2,2007.997268,2009.846995,,,,,,,,,...,,,,,,,0,0,1,0


In [64]:
merged = prices.merge(financials, left_on="Date", right_on="period_end").sort_values(by="period_end")
X = merged.drop(columns=["target", "Date", "filing_date"]) # filing date can reflect some arbitrary date in the future.
y = merged["target"]

X.shape, y.shape

((206, 317), (206,))

In [65]:
X.head()

Unnamed: 0,period_end,AccountsPayableCurrent,AccountsPayableTradeCurrent,AccountsReceivableNetCurrent,AccruedAdvertisingCurrent,AccruedIncomeTaxesNoncurrent,AccruedLiabilitiesCurrent,AccruedSalariesCurrent,AccumulatedDepreciationDepletionAndAmortizationPropertyPlantAndEquipment,AccumulatedOtherComprehensiveIncomeLossCumulativeChangesInNetGainLossFromCashFlowHedgesEffectNetOfTax,...,UnrecordedUnconditionalPurchaseObligationBalanceOnThirdAnniversary,UnrecordedUnconditionalPurchaseObligationDueAfterFiveYears,VariableLeaseCost,WeightedAverageNumberOfDilutedSharesOutstanding,WeightedAverageNumberOfSharesIssuedBasic,WeightedAverageNumberOfSharesOutstandingBasic,0,1,2,3
0,2014.245902,2553000000.0,,2618000000.0,,,1955000000.0,,9874000000.0,-38000000.0,...,,,,382100000.0,,379000000.0,0,1,0,0
1,2014.245902,,,,,,,,,-38000000.0,...,,,,382100000.0,,379000000.0,0,1,0,0
2,2014.284153,,,,,,,,,,...,,,,,,,0,1,0,0
3,2014.494536,2660000000.0,,2595000000.0,,,1978000000.0,,9942000000.0,-45000000.0,...,,,,379350000.0,,376600000.0,0,1,0,0
4,2014.494536,,,,,,,,,,...,,,,,,,1,0,0,0


In [66]:
mask = X["period_end"] >= 2025
X_train, X_test = X.loc[~mask, :], X.loc[mask, :]
y_train, y_test = y.loc[~mask], y.loc[mask]

X_train.shape, y_train.shape, X_test.shape, y_test.shape 

((185, 317), (185,), (21, 317), (21,))

In [67]:
X_test["period_end"]

185    2025.000000
186    2025.000000
187    2025.000000
188    2025.000000
189    2025.000000
190    2025.000000
191    2025.000000
192    2025.084699
193    2025.245902
194    2025.245902
195    2025.245902
196    2025.286885
197    2025.494536
198    2025.494536
199    2025.494536
200    2025.562842
201    2025.745902
202    2025.745902
203    2025.808743
204    2025.997268
205    2026.081967
Name: period_end, dtype: float64

In [None]:
# test 4 different events for validation, 6 events for test 
X_val, X_test = X_test.loc[:197,:], X_test.loc[197:,:]
y_val, y_test = y_test.loc[:197], y_test.loc[197:]

X_val.shape, y_val.shape, X_test.shape, y_test.shape,

((13, 317), (13,), (9, 317), (9,))