## Calculate yearly stock price returns

In [2]:
import pandas as pd

In [3]:
stocks = pd.read_csv("./data/stock_data.csv",
                index_col="Unnamed: 0",
                parse_dates=True)

stocks.head()

Unnamed: 0,AAPL,MSFT,XOM,SPX
2003-01-02,7.4,21.11,29.22,909.03
2003-01-03,7.45,21.14,29.24,908.59
2003-01-06,7.45,21.52,29.96,929.01
2003-01-07,7.43,21.93,28.95,922.93
2003-01-08,7.28,21.31,28.83,909.93


In [4]:
stocks

Unnamed: 0,AAPL,MSFT,XOM,SPX
2003-01-02,7.40,21.11,29.22,909.03
2003-01-03,7.45,21.14,29.24,908.59
2003-01-06,7.45,21.52,29.96,929.01
2003-01-07,7.43,21.93,28.95,922.93
2003-01-08,7.28,21.31,28.83,909.93
...,...,...,...,...
2011-10-10,388.81,26.94,76.28,1194.89
2011-10-11,400.29,27.00,76.27,1195.54
2011-10-12,402.19,26.96,77.16,1207.25
2011-10-13,408.43,27.18,76.37,1203.66


In [5]:
stocks.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2214 entries, 2003-01-02 to 2011-10-14
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AAPL    2214 non-null   float64
 1   MSFT    2214 non-null   float64
 2   XOM     2214 non-null   float64
 3   SPX     2214 non-null   float64
dtypes: float64(4)
memory usage: 86.5 KB


In [6]:
stocks.index

DatetimeIndex(['2003-01-02', '2003-01-03', '2003-01-06', '2003-01-07',
               '2003-01-08', '2003-01-09', '2003-01-10', '2003-01-13',
               '2003-01-14', '2003-01-15',
               ...
               '2011-10-03', '2011-10-04', '2011-10-05', '2011-10-06',
               '2011-10-07', '2011-10-10', '2011-10-11', '2011-10-12',
               '2011-10-13', '2011-10-14'],
              dtype='datetime64[ns]', length=2214, freq=None)

## Get the dailyreturn

In [7]:
dailyreturn = stocks.pct_change()

In [8]:
dailyreturn.head()

Unnamed: 0,AAPL,MSFT,XOM,SPX
2003-01-02,,,,
2003-01-03,0.006757,0.001421,0.000684,-0.000484
2003-01-06,0.0,0.017975,0.024624,0.022474
2003-01-07,-0.002685,0.019052,-0.033712,-0.006545
2003-01-08,-0.020188,-0.028272,-0.004145,-0.014086


In [9]:
dailyreturn.groupby(dailyreturn.index.year).sum()*100

Unnamed: 0,AAPL,MSFT,XOM,SPX
2003,43.584872,6.898664,18.771676,21.55207
2004,118.442218,10.185021,25.943358,9.226193
2005,87.945329,0.095162,13.837245,3.483644
2006,23.832396,16.859259,34.845801,13.26972
2007,91.965795,21.529358,24.568472,4.740316
2008,-66.939672,-47.032987,-0.880614,-40.145891
2009,96.233386,54.235841,-10.095848,24.783483
2010,46.161592,-4.332628,11.263742,13.656375
2011,29.584961,1.784256,10.90339,-0.663912


In [52]:
stocks['year'] = stocks.index.strftime('%Y')

In [53]:
stocks.index

DatetimeIndex(['2003-01-02', '2003-01-03', '2003-01-06', '2003-01-07',
               '2003-01-08', '2003-01-09', '2003-01-10', '2003-01-13',
               '2003-01-14', '2003-01-15',
               ...
               '2011-10-03', '2011-10-04', '2011-10-05', '2011-10-06',
               '2011-10-07', '2011-10-10', '2011-10-11', '2011-10-12',
               '2011-10-13', '2011-10-14'],
              dtype='datetime64[ns]', length=2214, freq=None)

In [55]:
stocks

Unnamed: 0,AAPL,MSFT,XOM,SPX,year
2003-01-02,7.40,21.11,29.22,909.03,2003
2003-01-03,7.45,21.14,29.24,908.59,2003
2003-01-06,7.45,21.52,29.96,929.01,2003
2003-01-07,7.43,21.93,28.95,922.93,2003
2003-01-08,7.28,21.31,28.83,909.93,2003
...,...,...,...,...,...
2011-10-10,388.81,26.94,76.28,1194.89,2011
2011-10-11,400.29,27.00,76.27,1195.54,2011
2011-10-12,402.19,26.96,77.16,1207.25,2011
2011-10-13,408.43,27.18,76.37,1203.66,2011


In [56]:
stocks.set_index('year', drop=True)

Unnamed: 0_level_0,AAPL,MSFT,XOM,SPX
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003,7.40,21.11,29.22,909.03
2003,7.45,21.14,29.24,908.59
2003,7.45,21.52,29.96,929.01
2003,7.43,21.93,28.95,922.93
2003,7.28,21.31,28.83,909.93
...,...,...,...,...
2011,388.81,26.94,76.28,1194.89
2011,400.29,27.00,76.27,1195.54
2011,402.19,26.96,77.16,1207.25
2011,408.43,27.18,76.37,1203.66


In [63]:
def firstlast(x):
    return x.iloc[[-1]]

In [64]:
stocks.groupby('year').apply(firstlast)[['AAPL', 'MSFT', 'XOM', 'SPX']].pct_change()

Unnamed: 0_level_0,Unnamed: 1_level_0,AAPL,MSFT,XOM,SPX
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2003,2003-12-31,,,,
2004,2004-12-31,2.012161,0.091244,0.28053,0.089935
2005,2005-12-30,1.232609,-0.009291,0.117859,0.03001
2006,2006-12-29,0.180136,0.158142,0.390946,0.136194
2007,2007-12-31,1.334748,0.208686,0.242731,0.035296
2008,2008-12-31,-0.569113,-0.443971,-0.131067,-0.384858
2009,2009-12-31,1.46901,0.6046,-0.126055,0.234542
2010,2010-12-31,0.530679,-0.065188,0.101318,0.127827
2011,2011-10-14,0.308284,-0.004381,0.087126,-0.026287


In [None]:
s2.pct_change().groupby(by='year')[['AAPL', 'MSFT', 'XOM', 'SPX']]

In [6]:
##calculate daily return

rets = stocks.pct_change().dropna()
rets

Unnamed: 0,AAPL,MSFT,XOM,SPX
2003-01-03,0.006757,0.001421,0.000684,-0.000484
2003-01-06,0.000000,0.017975,0.024624,0.022474
2003-01-07,-0.002685,0.019052,-0.033712,-0.006545
2003-01-08,-0.020188,-0.028272,-0.004145,-0.014086
2003-01-09,0.008242,0.029094,0.021159,0.019386
...,...,...,...,...
2011-10-10,0.051406,0.026286,0.036977,0.034125
2011-10-11,0.029526,0.002227,-0.000131,0.000544
2011-10-12,0.004747,-0.001481,0.011669,0.009795
2011-10-13,0.015515,0.008160,-0.010238,-0.002974


In [8]:
get_year = lambda x: x.year
by_year_stocks = rets.groupby(get_year).sum()*100
by_year_stocks

Unnamed: 0,AAPL,MSFT,XOM,SPX
2003,43.584872,6.898664,18.771676,21.55207
2004,118.442218,10.185021,25.943358,9.226193
2005,87.945329,0.095162,13.837245,3.483644
2006,23.832396,16.859259,34.845801,13.26972
2007,91.965795,21.529358,24.568472,4.740316
2008,-66.939672,-47.032987,-0.880614,-40.145891
2009,96.233386,54.235841,-10.095848,24.783483
2010,46.161592,-4.332628,11.263742,13.656375
2011,29.584961,1.784256,10.90339,-0.663912
