In [321]:
import pandas_datareader.data as web
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
sns.set_style('darkgrid')
sns.set_palette('viridis')

from datetime import datetime

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression


In [260]:
Quarterly = pd.read_csv("SP_Quarterly.csv").iloc[::-1]

In [261]:
Quarterly

Unnamed: 0,DATE,OPERATING_EARNINGS_PER_SHARE,AS_REPORTED_EARNINGS_PER_SHARE,CASH_DIVIDENDS_PER_SHARE,SALES_PER_SHARE,BOOK_VAL_PER_SHARE,CAPEX_PER_SHARE,PRICE,DIVISOR
132,,,,,,,,,
131,31/03/1988,$5.48,$5.53,$2.24,,,,258.89,6977.40
130,30/06/1988,$6.05,$6.22,$2.50,,,,273.50,6956.73
129,30/09/1988,$6.22,$6.38,$2.46,,,,271.91,6930.89
128,31/12/1988,$6.37,$5.62,$2.54,,,,277.72,6829.56
...,...,...,...,...,...,...,...,...,...
4,31/12/2019,$39.18,$35.53,$15.21,$369.23,$914.49,$23.23,3230.78,8282.73
3,31/03/2020,$19.50,$11.88,$15.32,$332.59,$885.59,$19.82,2584.59,8289.27
2,30/06/2020,$26.79,$17.83,$14.35,$315.61,$901.97,$17.17,3100.29,8269.11
1,30/09/2020,$37.90,$32.98,$13.97,$346.71,$920.34,$17.94,3363.00,8286.67


## Data tidying
We're going to be using the price and cash_dividends columns so we're going to need to drop null values for those columns. we also need to change the prices to numeric values, and it might help to have the dates as datetime objects

In [262]:
currency_cols = ['OPERATING_EARNINGS_PER_SHARE',
       'AS_REPORTED_EARNINGS_PER_SHARE', 'CASH_DIVIDENDS_PER_SHARE',
       'SALES_PER_SHARE', 'BOOK_VAL_PER_SHARE', 'CAPEX_PER_SHARE']

for col in currency_cols:
    Quarterly[col] = Quarterly[col].apply(lambda e: eval(str(e)[2:].strip(" ")) if str(e)[1] == '$' else e)
    
Quarterly.dropna(subset=['CASH_DIVIDENDS_PER_SHARE'],inplace = True)   
Quarterly.PRICE = Quarterly.PRICE.apply(lambda e: eval("".join(str(e).split(","))))
Quarterly.DIVISOR = pd.to_numeric(Quarterly.DIVISOR)



# CONVERTS DATE STRING TO DATETIME OBJECT
def to_dt_obj(date):
    dmy = list(map(lambda x: int(x),date.split('/')))
    return datetime(dmy[2],dmy[1],dmy[0])


Quarterly.DATE = Quarterly.DATE.apply(lambda date: to_dt_obj(date))


we can check the data types to see if theyre how we want them

In [263]:
Quarterly.dtypes

DATE                              datetime64[ns]
OPERATING_EARNINGS_PER_SHARE             float64
AS_REPORTED_EARNINGS_PER_SHARE           float64
CASH_DIVIDENDS_PER_SHARE                 float64
SALES_PER_SHARE                          float64
BOOK_VAL_PER_SHARE                       float64
CAPEX_PER_SHARE                          float64
PRICE                                    float64
DIVISOR                                  float64
dtype: object

## Approximate Dividend Growth

In [264]:
def range_slider(fig,buttons = None):
    if not buttons:
        buttons = [
                dict(count=7,
                     label="1w",
                     step="day",
                     stepmode="todate"),
                dict(count=14,
                     label="2w",
                     step="day",
                     stepmode="todate"),
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(step="all")
            ]
    fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list(buttons)
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    ))
    
buttons = [
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(count=5,
                     label="5y",
                     step="year",
                     stepmode="backward"),
    
                dict(step="all")
            ]

In [265]:
Quarterly["ROLLING_AVERAGE_DIVIDENDS PER SHARE"] = Quarterly.CASH_DIVIDENDS_PER_SHARE.rolling(window=10).mean()

In [266]:

plot = px.line(Quarterly,x = 'DATE', y = ["CASH_DIVIDENDS_PER_SHARE","ROLLING_AVERAGE_DIVIDENDS PER SHARE"],
              labels={
                         "value": "Divident per share ($)"
                     },
              title="Variation in S&P 500 Dividends since 1988")
range_slider(plot,buttons)
plot.show()

Approximating the divident growth is difficult. Suppose we approximated the slope begining in 2010. We'd get a different result than if we started in 1990. The problem here is working out whether something fundamentally changed in 2007 which altered the rate of divident growth, or whether this is just a deviation that will converge back to a constant historic mean. We also see a substantial slump at the very top of the curve in 2020. If we're valuing the market now, and we care more about more immediate rewards (the time value of money), then maybe we should pay more attention to current trends.

### Predicting Growth

We can use a polynomial regressor to try and predict the growth of the S&P

In [324]:
degree=3
polyreg=make_pipeline(PolynomialFeatures(degree),LinearRegression())
polyreg.fit(date,Quarterly.CASH_DIVIDENDS_PER_SHARE)

Pipeline(steps=[('polynomialfeatures', PolynomialFeatures(degree=3)),
                ('linearregression', LinearRegression())])

In [330]:
Quarterly['POLY_REG_DIVS'] = polyreg.predict(date)

In [332]:
plot = px.line(Quarterly,x = 'DATE', y = ["CASH_DIVIDENDS_PER_SHARE","ROLLING_AVERAGE_DIVIDENDS PER SHARE",'POLY_REG_DIVS'],
              labels={
                         "value": "Divident per share ($)"
                     },
              title="Variation in S&P 500 Dividends since 1988")
range_slider(plot,buttons)
plot.show()