In [246]:
import numpy as np
import pandas as pd
import utilities.mpt_utils as mpt_utils

In [247]:
df_monthly_adj_close = pd.read_csv('../../data/df_monthly_prices_complete.csv', index_col=0)

#### Converting 'Adj Close' Values to Percentage Change
The "Adj Close" will be transformed to represent the percentage change from the previous day's close. The percentage change will be calculated as 1 + percentage change, which indicates the relative change in the adjusted close values day-over-day.

In [248]:
df_monthly_return = df_monthly_adj_close.copy(deep=True)
for column in df_monthly_return:
    if column != 'Date':
        df_monthly_return[column] = 1 + df_monthly_adj_close[column].pct_change().round(2)
        df_monthly_return[column].dropna()
# Remove first item as it is Null
df_monthly_return = df_monthly_return[1:]

In [249]:
df_monthly_return.to_csv('../../data/df_monthly_returns_complete.csv')

#### Converting local currencies to euro

The monthly adjusted closing prices are given in local currencies, Japanese Yen for Tokyo Stock Exchange, British Pound for London Stock Exchange etc.
For a proper allocation of equities, the adjusted closing prices should be converted to one single currency. Euro was selected for this purpose

In [250]:
df_monthly_adj_close

Unnamed: 0_level_0,RS1.L,KE,TEG.DE,LEG.DE,SCS,HNI,AVT,ACCO,VNA.DE,7912.T,...,DEQ.DE,KIDS,HALO,MATW,9842.T,KVHI,MOON.L,NEO,6055.T,UNP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1999-10-01,192.424942,11.570000,44.819553,30.297156,5.165832,9.774066,21.209171,12.130485,8.585901,2008.531665,...,7.348418,23.750000,2.934000,9.178375,387.510425,3.06250,195.460001,0.400,56.376208,8.588829
1999-11-01,218.152206,11.570000,44.819553,30.297156,5.319274,10.770150,21.354940,12.517400,8.585901,2066.222290,...,7.348418,23.750000,2.934000,8.509521,387.510425,3.03125,195.460001,0.400,56.376208,7.250434
1999-12-01,241.588638,11.570000,44.819553,30.297156,4.910100,10.974279,23.517170,12.517400,8.585901,2013.300952,...,7.348418,23.750000,2.934000,10.065028,387.510425,3.06250,195.460001,0.400,56.376208,6.730484
2000-01-01,246.585907,11.570000,44.819553,30.297156,4.700321,9.629849,20.923582,9.133037,8.585901,2013.300952,...,7.348418,23.750000,2.934000,8.509521,387.510425,3.75000,195.460001,0.400,56.376208,6.227546
2000-02-01,246.585907,11.740000,36.391943,37.196711,5.061888,9.223397,26.063131,17.840927,10.089434,1862.757324,...,7.534067,22.286000,3.144000,8.870208,571.449524,7.37500,220.700000,0.382,56.951643,5.879422
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-01,690.306335,22.920000,14.260000,78.867676,13.556043,46.408180,54.285213,4.940793,27.895920,4884.000000,...,18.555727,31.580000,44.290001,27.826420,1897.316406,5.18000,160.000000,13.710,1944.000000,230.274567
2024-06-01,687.853210,21.980000,13.670000,76.260002,12.861370,44.735172,51.193146,4.629781,26.549999,5412.000000,...,19.906872,28.760000,52.360001,24.824409,1922.033936,4.65000,190.600006,13.870,2214.000000,225.062576
2024-07-01,818.000000,23.700001,13.950000,80.860001,14.379726,54.602348,53.759998,5.033655,28.410000,4970.000000,...,21.843512,30.780001,55.259998,28.689289,1799.435059,4.45000,218.000000,17.730,1998.000000,245.424240
2024-08-01,783.000000,18.450001,14.830000,87.059998,14.140000,53.509308,55.180000,5.398128,31.180000,5249.000000,...,20.672520,31.950001,63.849998,25.101889,1719.350342,4.44000,210.500000,16.520,1774.000000,254.734711


In [251]:
df_overview = pd.read_csv('../../data/df_overview.csv', index_col=0)
df_prices_euro = mpt_utils.get_full_prices_euro(df_monthly_adj_close, df_overview)

df_prices_euro.to_csv('../../data/df_monthly_prices_complete_euro.csv')

In [252]:
df_prices_euro

Unnamed: 0_level_0,RS1.L,KE,TEG.DE,LEG.DE,SCS,HNI,AVT,ACCO,VNA.DE,7912.T,...,DEQ.DE,KIDS,HALO,MATW,9842.T,KVHI,MOON.L,NEO,6055.T,UNP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1999-10-01,225.137182,10.181600,44.819553,30.297156,4.545932,8.601178,18.664071,10.674827,8.585901,12.452896,...,7.348418,20.900000,2.581920,8.076970,2.402565,2.6950,228.688201,0.35200,0.349532,7.558170
1999-11-01,255.238082,10.181600,44.819553,30.297156,4.680961,9.477732,18.792348,11.015312,8.585901,12.810578,...,7.348418,20.900000,2.581920,7.488379,2.402565,2.6675,228.688201,0.35200,0.349532,6.380382
1999-12-01,282.658707,10.181600,44.819553,30.297156,4.320888,9.657366,20.695110,11.015312,8.585901,12.482466,...,7.348418,20.900000,2.581920,8.857225,2.402565,2.6950,228.688201,0.35200,0.349532,5.922826
2000-01-01,288.505511,10.181600,44.819553,30.297156,4.136283,8.474268,18.412752,8.037073,8.585901,12.482466,...,7.348418,20.900000,2.581920,7.488379,2.402565,3.3000,228.688201,0.35200,0.349532,5.480240
2000-02-01,288.505511,10.331200,36.391943,37.196711,4.454461,8.116590,22.935556,15.700016,10.089434,11.549095,...,7.534067,19.611680,2.766720,7.805783,3.542987,6.4900,258.219000,0.33616,0.353100,5.173892
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-01,807.658412,20.169600,14.260000,78.867676,11.929318,40.839199,47.770988,4.347898,27.895920,30.280800,...,18.555727,27.790400,38.975201,24.487249,11.763362,4.5584,187.200000,12.06480,12.052800,202.641619
2024-06-01,804.788256,19.342400,13.670000,76.260002,11.318006,39.366952,45.049968,4.074207,26.549999,33.554400,...,19.906872,25.308800,46.076801,21.845480,11.916610,4.0920,223.002007,12.20560,13.726800,198.055067
2024-07-01,957.060000,20.856001,13.950000,80.860001,12.654159,48.050067,47.308799,4.429617,28.410000,30.814000,...,21.843512,27.086401,48.628799,25.246574,11.156497,3.9160,255.060000,15.60240,12.387600,215.973331
2024-08-01,916.110000,16.236001,14.830000,87.059998,12.443200,47.088191,48.558400,4.750352,31.180000,32.543800,...,20.672520,28.116001,56.187999,22.089662,10.659972,3.9072,246.285000,14.53760,10.998800,224.166545


### Handling outliers

#### Remove cumulative return outliers

In [253]:
df_cum_returns = pd.DataFrame(round(df_monthly_return.prod().sort_values(ascending=False), 2), columns=['cum_return'])
df_cum_returns

Unnamed: 0,cum_return
MCG.L,5.065104e+09
NVDA,2.518770e+03
BGEO.L,1.294350e+03
DECK,9.095100e+02
NFLX,6.476100e+02
...,...
MVIS,1.000000e-02
BIG,1.000000e-02
GILT,0.000000e+00
2181.T,0.000000e+00


In [254]:
columns = df_cum_returns.loc[df_cum_returns['cum_return'] > 1].index
columns

Index(['MCG.L', 'NVDA', 'BGEO.L', 'DECK', 'NFLX', 'ODFL', 'AAPL', 'TPL',
       'AOF.DE', 'SRT3.DE',
       ...
       'BBW', 'BGC', 'GPS', 'FNKO', '9409.T', 'SRP.L', 'OUT', 'HHFA.DE', 'DBX',
       'CTS'],
      dtype='object', length=1336)

#### Remove mean outliers

In [255]:
df_mean_avg = pd.DataFrame(round(df_monthly_return[columns].mean().sort_values(ascending=False), 2), columns=['mean_avg'])
df_mean_avg

Unnamed: 0,mean_avg
BGEO.L,1.54
ILM1.DE,1.08
GME,1.07
2124.T,1.07
2491.T,1.06
...,...
PSTL,1.00
NTST,1.00
BLCO,1.00
NBS.L,1.00


In [256]:
columns = df_mean_avg.loc[df_mean_avg['mean_avg'] < 1.08].index
columns

Index(['GME', '2124.T', '2491.T', '2471.T', '3046.T', 'PAT.DE', 'CROX',
       'AOF.DE', 'SFQ.DE', 'DAN',
       ...
       'HLN.L', 'DBX', 'BNL', 'CBL', 'KVUE', 'PSTL', 'NTST', 'BLCO', 'NBS.L',
       'MCG.L'],
      dtype='object', length=1334)

#### Remove negative outliers

In [257]:
df_minus = pd.DataFrame(round(df_monthly_return[columns].min(), 2), columns=['minus_val'])
df_minus

Unnamed: 0,minus_val
GME,0.31
2124.T,0.29
2491.T,0.15
2471.T,0.14
3046.T,0.28
...,...
PSTL,0.88
NTST,0.88
BLCO,0.82
NBS.L,0.84


In [258]:
columns = df_minus.loc[df_minus['minus_val'] > 0].index
columns

Index(['GME', '2124.T', '2491.T', '2471.T', '3046.T', 'PAT.DE', 'CROX',
       'AOF.DE', 'SFQ.DE', 'DAN',
       ...
       'KREF', 'HLN.L', 'DBX', 'BNL', 'CBL', 'KVUE', 'PSTL', 'NTST', 'BLCO',
       'NBS.L'],
      dtype='object', length=1332)

In [259]:
# try without JAPAN?!
#columns = [x for x in columns if '.T' not in x]

### Save datasets

In [260]:
df_overview = pd.read_csv('../../data/df_overview.csv', index_col=0)
df_monthly_prices_complete = pd.read_csv('../../data/df_monthly_prices_complete.csv', index_col=0)
df_monthly_prices_complete_euro = pd.read_csv('../../data/df_monthly_prices_complete_euro.csv', index_col=0)
df_monthly_returns_complete = pd.read_csv('../../data/df_monthly_returns_complete.csv', index_col=0)

In [261]:
df_overview = df_overview[df_overview['stock_ticker_symbol'].isin(columns)]
df_overview.reset_index()
df_monthly_prices_complete = df_monthly_prices_complete[columns]
df_monthly_prices_complete_euro = df_monthly_prices_complete_euro[columns]
df_monthly_returns_complete = df_monthly_returns_complete[columns]

In [262]:
df_overview.to_csv('../../data/df_overview.csv')
df_monthly_prices_complete.to_csv('../../data/df_monthly_prices_complete.csv')
df_monthly_prices_complete_euro.to_csv('../../data/df_monthly_prices_complete_euro.csv')
df_monthly_returns_complete.to_csv('../../data/df_monthly_returns_complete.csv')