In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import performanceanalytics.table.table as pat
import statistics

Import stock symbols as well as company name

In [None]:
#stock_data = pd.read_csv('Companies_Ticker.csv', sep = ';')
stock_data = pd.read_csv('Companies_Ticker.csv', sep=';')

Pulls time series data for stocks on a daily basis from 1989-9-18 until 2021-12-31.

Parameters
```
:stock_dict:  dictionary
    Contains the stock symbols as key and the time series as values.
:stocks_as_df:  dataframe
    Contains the time series data as one df`
```

In [6]:
stock_dict = {}
for s in stock_data['Symbol']:  # iterate for every stock indices
    # Retrieve data from Yahoo Finance
    tickerData = yf.Ticker(s)
    # Save historical data
    stock_dict[s] = yf.download(
        s, start='1989-9-18', end='2021-12-31', progress=False)
# Concatenate all data
stocks_as_df = pd.concat(stock_dict, axis=0)

In [8]:
stocks_as_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Adj Close,Volume
Unnamed: 0_level_1,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
^GDAXI,1989-09-18,1597.079956,1597.079956,1597.079956,1597.079956,1597.079956,0.0
^GDAXI,1989-09-19,1613.119995,1613.119995,1613.119995,1613.119995,1613.119995,0.0
^GDAXI,1989-09-20,1617.949951,1617.949951,1617.949951,1617.949951,1617.949951,0.0
^GDAXI,1989-09-21,1615.209961,1615.209961,1615.209961,1615.209961,1615.209961,0.0
^GDAXI,1989-09-22,1626.020020,1626.020020,1626.020020,1626.020020,1626.020020,0.0
...,...,...,...,...,...,...,...
ZAL.DE,2021-12-23,70.300003,70.699997,69.519997,70.519997,70.519997,411277.0
ZAL.DE,2021-12-27,70.360001,70.720001,69.900002,70.459999,70.459999,389135.0
ZAL.DE,2021-12-28,70.239998,71.500000,70.160004,70.800003,70.800003,325925.0
ZAL.DE,2021-12-29,71.019997,72.220001,70.199997,70.699997,70.699997,391307.0


Check if `stocks_as_df` contains NA or zeros in Volume & Adjusted Close
```
:stocks_as_df:  dataframe
    Contains the time series data as one df.
:stocks_as_df_Volume_is_0:  dataframe
    Contains the rows where Volume == 0.
```

In [9]:
stocks_as_df_has_nan = np.isnan(np.sum(stocks_as_df))

#(stocks_as_df < 0).any()
# (stocks_as_df = 0).any()

stocks_as_df_Volume_is_0 = stocks_as_df.loc[stocks_as_df["Volume"] == 0]

Check if Adj Close in `stocks_as_df` differs from previous/ following day.
```
:stocks_as_df:  dataframe
    Contains the time series data as one df.
:stocks_as_df_adjclose_peak_bottom:  dataframe
    Contains the rows where Adj. Close differs
```

In [11]:
stocks_as_df_adjclose_peak_bottom_list = []
n = 1

while n < len(stocks_as_df)-1:
    if abs(stocks_as_df["Adj Close"][n] -
           statistics.mean([stocks_as_df["Adj Close"][n-1],
                            stocks_as_df["Adj Close"][n+1]])) > .5 * stocks_as_df["Adj Close"][n]:
        stocks_as_df_adjclose_peak_bottom_list.append(stocks_as_df.iloc[n])

    n += 1

stocks_as_df_adjclose_peak_bottom = pd.DataFrame(
    stocks_as_df_adjclose_peak_bottom_list)

In [12]:
stocks_as_df_adjclose_peak_bottom

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
"(^GDAXI, 2021-12-30 00:00:00)",15837.559570,15890.740234,15834.570312,15884.860352,15884.860352,32456600.0
"(ADS.DE, 1998-06-24 00:00:00)",40.916248,41.159000,40.775501,40.775501,29.134739,181816.0
"(AIR.DE, 1998-11-10 00:00:00)",15.400000,15.400000,15.400000,15.400000,8.746623,600.0
"(BAS.DE, 1996-12-16 00:00:00)",15.198145,15.354100,15.083100,15.172600,6.200554,1228124.0
"(BAYN.DE, 1996-12-16 00:00:00)",30.969471,31.110382,30.099039,30.365728,15.570714,1146230.0
...,...,...,...,...,...,...
"(ENR.DE, 2020-09-29 00:00:00)",21.850000,23.370001,21.559999,22.000000,21.887093,10261596.0
"(SY1.DE, 2006-12-11 00:00:00)",17.250000,18.639999,17.250000,18.500000,13.719698,23895609.0
"(VOW3.DE, 1998-07-22 00:00:00)",63.148163,65.053703,63.148163,64.596451,33.766048,56336.0
"(VNA.DE, 2013-07-11 00:00:00)",14.336004,14.755185,14.310853,14.713267,11.321285,3899256.0


**Define all dates with listing/ delisting**

Parameters
```
:index_compositions:  data frame
    Contains the deletions/ additions as well as date of change/ announcements & Merger/Spin-Off Information
```

In [13]:
index_compositions = pd.read_csv('Historical_Index_Compositions.csv', sep = ';')

In [15]:
returns_daily = {}
for s in stock_data['Symbol']:
    returns_daily[s] = stock_dict[s]['Adj Close'].pct_change()

**Transform daily price data to weekly returnTransform daily price data to weekly return's**

Parameters
```
:stock_weekly:  dictionary
    Contains the stock symbols as key and the weekly returns as values`
```

In [17]:
returns_weekly = {}
for s in stock_data['Symbol']:
    returns_weekly[s] = stock_dict[s]['Adj Close'].resample(
        'W').ffill().pct_change()

**Calculating measures of location, statistical dispersion and shape**

Parameters
```
:des_stat:  dataframe
    Contains the descriptive statistics`
```

In [18]:
des_stat = pd.DataFrame(columns=stock_data['Symbol'],
                        index=['Observations', 'NAs', 'Minimum', 'Quartile 1', 'Median',
                               'Artithmetic Mean', 'Geometric Mean', 'Quartile 3', 'Maximum', 'SE Mean',
                               'LCL Mean (.95)', 'UCL Mean (.95)', 'Variance', 'Stdev', 'Skewness', 'Kurtosis'])

for s in stock_data['Symbol']:
    df = pd.DataFrame(returns_daily[s])
    des_stat[s] = pat.stats_table(df, manager_col=0)
print(des_stat)

Symbol                 ^GDAXI       ADS.DE       AIR.DE       ALV.DE  \
Observations      8158.000000  6022.000000  5923.000000  6419.000000   
NAs                  1.000000     1.000000     1.000000     1.000000   
Minimum             -0.131434    -0.153704    -0.215624    -0.153277   
Quartile 1          -0.006299    -0.009064    -0.010201    -0.008944   
Median               0.000791     0.000000     0.000000     0.000000   
Artithmetic Mean     0.000382     0.000554     0.000716     0.000402   
Geometric Mean       0.000282     0.000359     0.000430     0.000169   
Quartile 3           0.007422     0.009824     0.011345     0.009525   
Maximum              0.114020     0.136590     0.206724     0.262448   
SE Mean              0.000156     0.000255     0.000311     0.000270   
LCL Mean (.95)       0.000380     0.000551     0.000712     0.000399   
UCL Mean (.95)       0.000383     0.000557     0.000720     0.000406   
Variance             0.000200     0.000390     0.000572     0.00

**Calculating the downside statistics**

Parameters
```
:down_stat:  dataframe
    Contains the downside statistics`
```

In [19]:
down_stat = pd.DataFrame(columns=stock_data['Symbol'],
                         index=['Semi Deviation', 'Gain Deviation', 'Loss Deviation', 'Downside Deviation (MAR=2.0%)',
                                'Downside Deviation (rf=0.5%)', 'Downside Deviation (0%)', 'Maximum Drawdown',
                                'Historical VaR (95%)', 'Historical ES (95%)', 'Modified VaR (95%)', 'Modified ES (95%)'])

for s in stock_data['Symbol']:
    df = pd.DataFrame(returns_daily[s])
    down_stat[s] = pat.create_downside_table(df, 0)
down_stat

KeyboardInterrupt: 