In [30]:
import pandas as pd
import numpy as np
import importlib

import utilities.variables as variables
import utilities.api_ticker_service as data_ticker_service
importlib.reload(data_ticker_service)
importlib.reload(variables)

<module 'utilities.variables' from '/Users/herbishtini/Documents/UNI/Master Thesis/sustainability_portfolio_optimisation/utilities/variables.py'>

### Average historical return

In [31]:
df_monthly_adj_close = pd.read_csv('../data/10_monthly_adjacent_close.csv', index_col=0)

#### Converting 'Adj Close' Values to Percentage Change
The "Adj Close" will be transformed to represent the percentage change from the previous day's close. The percentage change will be calculated as 1 + percentage change, which indicates the relative change in the adjusted close values day-over-day.

In [32]:
df_monthly_return = df_monthly_adj_close.copy(deep=True)
for column in df_monthly_return:
    if column != 'Date':
        df_monthly_return[column] = 1 + df_monthly_adj_close[column].pct_change().round(2)
        df_monthly_return[column].dropna()

In [33]:
df_monthly_return.to_csv('../data/10_monthly_returns_complete.csv')

### Return rate

In [34]:
df_monthly_return = pd.read_csv('../data/10_monthly_returns_complete.csv', index_col=0)
df_overview = pd.read_csv('../data/data_5_scaled.csv', index_col=0)

In [35]:
df_monthly_adj_close.head()

Unnamed: 0_level_0,KE,SCS,HNI,AVT,ACCO,KEYS,CBRE,BRC,PGRE,BHE,...,ENPH,RUN,SUP,MATX,KIDS,HALO,MATW,KVHI,NEO,UNP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1999-08-01,,6.013282,11.76295,17.141674,,,,8.704611,,13.926089,...,,,15.431546,6.066541,,,9.999043,2.4375,,7.509912
1999-09-01,,5.658068,9.648345,16.270067,,,,9.284917,,13.358641,...,,,15.431546,6.040688,,,11.104928,2.375,,7.413507
1999-10-01,,5.165832,9.836304,21.209169,,,,8.414455,,6.052765,...,,,14.708194,6.072314,,,9.261786,3.0625,,8.634527
1999-11-01,,5.319273,10.838723,21.354946,,,,8.951837,,8.41713,...,,,14.823595,5.756048,,,8.586853,3.03125,,7.289008
1999-12-01,,4.910099,11.044152,23.517157,,,,9.899887,,8.677205,...,,,14.823595,5.827809,,,10.156491,3.0625,,6.766294


In [36]:
# move Date index to a new column
df_monthly_adj_close = df_monthly_adj_close.reset_index(level=0)
df_monthly_return = df_monthly_return.reset_index(level=0)

In [46]:
df_monthly_adj_close['RH']

0             NaN
1             NaN
2             NaN
3             NaN
4             NaN
          ...    
295    348.260010
296    247.050003
297    271.929993
298    244.440002
299    280.970001
Name: RH, Length: 300, dtype: float64

In [38]:
# 1, 5, 10, 25 year returns
# Loop through time spans
for i, years in enumerate(variables.time_span_years):
    # Loop through tickers/stock name
    for j, ticker in enumerate(df_overview['stock_ticker_symbol']):
        # if ticker is found in monthly adjacent columns, meaning there are available data to calculate
        if ticker in df_monthly_return.columns:
            # Get date "years" ago
            date = pd.Timestamp.today() - pd.DateOffset(years=years)
            # Pick only stocks that are after this date
            monthly_return_list = df_monthly_return.loc[ pd.to_datetime(df_monthly_return['Date']) >= date, ticker].dropna().tolist()
            if len(monthly_return_list) >= 2:
                # Calculate the i-years total return
                total_return = np.prod(monthly_return_list) - 1
                
                # Calculate the annualized average return
                annualized_return = np.prod(monthly_return_list) ** (1/years)
                
                if annualized_return.is_integer():
                    df_overview.loc[df_overview['stock_ticker_symbol'] == ticker, 'return_rate' + '_' + str(years) + 'y_avg'] = annualized_return

In [39]:
df_overview

Unnamed: 0,company_name,industry,stock_exchange,stock_ticker_symbol,market_capital_euro,score
0,"Kimball Electronics, Inc.",Technology Hardware,NAS,KE,5.481967e+08,0.000120
1,"Steelcase, Inc.",Commercial Services,NYS,SCS,1.461633e+09,0.013235
2,HNI Corp.,Commercial Services,NYS,HNI,2.138245e+09,0.015005
3,"Avnet, Inc.",Technology Hardware,NAS,AVT,4.517528e+09,0.018784
4,ACCO Brands Corp.,Commercial Services,NYS,ACCO,4.399426e+08,0.019449
...,...,...,...,...,...,...
954,"Halozyme Therapeutics, Inc.",Pharmaceuticals,NAS,HALO,6.410791e+09,0.249868
955,Matthews International Corp.,Commercial Services,NAS,MATW,7.831761e+08,0.250174
956,"KVH Industries, Inc. (Delaware)",Technology Hardware,NAS,KVHI,8.416451e+07,0.250012
957,"NeoGenomics, Inc.",Pharmaceuticals,NAS,NEO,1.821173e+09,0.250415


### Volatility
To calculate the volatility of a stock's monthly returns, there are several steps that should be applied to the historical price data.
* Calculate Monthly Returns
* Calculate the Average Monthly Return
* Calculate the Monthly Return Deviations
* Square the Deviations
* Calculate the Variance
* Calculate the Standard Deviation

In [None]:
# 1, 5, 10, 25 year returns
# Loop through time spans
for i, years in enumerate(variables.time_span_years):
    for i, ticker in enumerate(df_overview['stock_ticker_symbol']):
        if ticker in df_monthly_adj_close.columns:
            # Get date "years" ago
            date = pd.Timestamp.today() - pd.DateOffset(years=years)
            # Pick only stocks that are after this date
            adj_close_filtered = df_monthly_adj_close.loc[ pd.to_datetime(df_monthly_adj_close['Date']) >= date, ticker].dropna()
            std_deviation = adj_close_filtered.pct_change().std()

            if len(adj_close_filtered) >= 2:
                df_overview.loc[df_overview['stock_ticker_symbol'] == ticker, 'volatility' + '_' + str(years) + 'y'] = std_deviation

In [None]:
df_overview[['stock_ticker_symbol', 'return_rate_5y_avg', 'return_rate_10y_avg', 'return_rate_25y_avg', 'volatility_10y']].sort_values(by=['return_rate_10y_avg', 'volatility_10y'])

Store changes

In [None]:
df_overview.to_csv('../data/data_10_overview.csv')
df_monthly_return.to_csv('../data/data_10_monthly_returns_complete.csv')