# GreekStockPortfolio
### 1. Data Acquisition & Cleaning
---



### Imports

In [None]:

import datetime as dt
import time
from copy import copy
from math import sqrt

import numpy as np
import pandas as pd
import yfinance as yf

import matplotlib.pyplot as plt
from matplotlib import style
import matplotlib.dates as mdates
import seaborn as sns
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go

import requests
import bs4

from scipy import stats
from scipy.cluster.vq import kmeans, vq


from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn import preprocessing


import statsmodels.api as sm
import statsmodels.formula.api as smf

import riskfolio as rp
import quantstats as qs

### Web Scrapping

In [None]:
def ATHEX_GD():
    """
    Retrieves the list of components for the GD.AT index from the official Athens Exchange (AthexGroup) website
    and checks the availability of historical data for each component on Yahoo Finance.

    This function fetches the list of stock tickers that comprise the GD.AT index, then verifies which of these
    tickers have historical data available on Yahoo Finance. The function returns two lists: one containing the
    tickers with available data and another with tickers that lack data.

    Returns:
    ATHEX2 (list): A list of stock tickers with available historical data.
    nodata_stock_list (list): A list of stock tickers without available historical data.
    """

    # Fetch the GD.AT components from the official Athens Exchange (AthexGroup) website
    response = requests.get("https://www.athexgroup.gr/web/guest/index-composition")

    # Parse the HTML content using BeautifulSoup
    soup = bs4.BeautifulSoup(response.text, features="lxml")

    # Locate the table containing the index components
    table = soup.find('table', class_="data-historical")
    rows = table.find_all('tr')
    header = []
    rows = []

    # Iterate over each row in the table
    for i, row in enumerate(table.find_all('tr')):
        if i == 0:
            header = [el.text.strip() for el in row.find_all('th')] # Extract header text and remove leading/trailing whitespace
        else:
            rows.append([el.text.strip() for el in row.find_all('td')]) # Extract data cell text and remove leading/trailing whitespace

    ATHEX = pd.DataFrame(rows)
    ATHEX.columns = header

    # Append ".AT" to each component's symbol to match Yahoo Finance format
    ATHEX.iloc[:, 0] = ATHEX.iloc[:, 0] + ".AT"

    # Convert the DataFrame's first column to a list of component symbols
    ATHEX = ATHEX.iloc[:, 0].tolist()
    nodata_stock_list = []
    ATHEX2 = []
    # Check each symbol for historical data availability
    for i in ATHEX:
        d = pd.DataFrame()
        specific_date = dt.datetime(2018, 1, 2)
        specific_date2 = dt.datetime(2018, 1, 3)
        try:
            d = yf.download([i], start=specific_date, end=specific_date2)
        except:
            print("")
        if d.empty:
            nodata_stock_list.append(i)
        else:
            ATHEX2.append(i)
    return ATHEX2, nodata_stock_list

In [None]:
max_retries=10
delay=1
# Try to fetch the components from ATHEX_GD function
for attempt in range(max_retries):
    try:
        components,no_data_stocklist= ATHEX_GD()
    except Exception as e:
        print(f"Attempt {attempt} failed: {e}")
        if attempt < max_retries:
            print(f"Retrying in {delay} seconds...")
            time.sleep(delay)
        else:
            print(f"Max retries ({max_retries}) exceeded.")

[*********************100%%**********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['ACAG.AT']: YFChartError("%ticker%: Data doesn't exist for startDate = 1514844000, endDate = 1514930400")
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TITC.AT']: YFChartError("%ticker%: Data doesn't exist for startDate = 1514844000, endDate = 1514930400")
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 compl

In [None]:
# Show the fetched components
print('The Athens General Index (GD.AT) components for which historical data is available are:')

for i in range(len(components)):
  print(f'{i+1}. {components[i]}')
print('\nThe Athens General Index (GD.AT) components for which historical data is not available are:')
for i in range(len(no_data_stocklist)):
  print(f'{i+1}. {no_data_stocklist[i]}')


The Athens General Index (GD.AT) components for which historical data is available are:
1. VIO.AT
2. CENER.AT
3. EEE.AT
4. HTO.AT
5. AEGN.AT
6. TENERGY.AT
7. PREMIA.AT
8. ADMIE.AT
9. PLAT.AT
10. BRIQ.AT
11. LAVI.AT
12. SAR.AT
13. AVAX.AT
14. ELLAKTOR.AT
15. OLYMP.AT
16. FOYRK.AT
17. GEKTERNA.AT
18. BIOKA.AT
19. MYTIL.AT
20. LAMDA.AT
21. INTRK.AT
22. ELHA.AT
23. ALPHA.AT
24. BELA.AT
25. ETE.AT
26. TPEIR.AT
27. ALMY.AT
28. EUROB.AT
29. ELPE.AT
30. QUEST.AT
31. PLAKR.AT
32. OTOEL.AT
33. INLOT.AT
34. PETRO.AT
35. EYDAP.AT
36. DOMIK.AT
37. IKTIN.AT
38. QUAL.AT
39. ELTON.AT
40. EXAE.AT
41. OPAP.AT
42. MOH.AT
43. OLTH.AT
44. EYAPS.AT
45. INKAT.AT
46. PPC.AT
47. CENTR.AT
48. PPA.AT
49. KRI.AT
50. INTEK.AT
51. PROF.AT
52. ELIN.AT
53. MOTO.AT

The Athens General Index (GD.AT) components for which historical data is not available are:
1. ACAG.AT
2. TITC.AT
3. INLIF.AT
4. BLEKEDROS.AT
5. DIMAND.AT
6. OPTIMA.AT
7. AIA.AT


In [None]:
# Separate stocks into small-cap and mid/large-cap categories
small_cap_stocks=[]
mid_large_cap_stocks=[]
for i in components:
    stock = yf.Ticker(i)
    market_cap = stock.info['marketCap'] # Get market cap of the stock
    if market_cap>100000000:
        mid_large_cap_stocks.append(i)

    else:
        small_cap_stocks.append(i)

In [None]:
print('Mid-cap and large-cap stocks of the GD.AT index are:')

for i in range(len(mid_large_cap_stocks)):
  print(f'{i+1}. {mid_large_cap_stocks[i]}')
print('\nSmall-cap stocks of the GD.AT index are:')
for i in range(len(no_data_stocklist)):
  print(f'{i+1}. {small_cap_stocks[i]}')


Mid-cap and large-cap stocks of the GD.AT index are:
1. VIO.AT
2. CENER.AT
3. EEE.AT
4. HTO.AT
5. AEGN.AT
6. TENERGY.AT
7. PREMIA.AT
8. ADMIE.AT
9. PLAT.AT
10. LAVI.AT
11. SAR.AT
12. AVAX.AT
13. ELLAKTOR.AT
14. FOYRK.AT
15. GEKTERNA.AT
16. MYTIL.AT
17. LAMDA.AT
18. INTRK.AT
19. ELHA.AT
20. ALPHA.AT
21. BELA.AT
22. ETE.AT
23. TPEIR.AT
24. EUROB.AT
25. ELPE.AT
26. QUEST.AT
27. PLAKR.AT
28. OTOEL.AT
29. INLOT.AT
30. EYDAP.AT
31. EXAE.AT
32. OPAP.AT
33. MOH.AT
34. OLTH.AT
35. EYAPS.AT
36. INKAT.AT
37. PPC.AT
38. PPA.AT
39. KRI.AT
40. INTEK.AT
41. PROF.AT

Small-cap stocks of the GD.AT index are:
1. BRIQ.AT
2. OLYMP.AT
3. BIOKA.AT
4. ALMY.AT
5. PETRO.AT
6. DOMIK.AT
7. IKTIN.AT


### Download & Cleaning Data

In [None]:
def return_stocks(stocks, end_date=dt.datetime.now(), years=5):
    """
    Retrieves historical stock data for a list of stock tickers and calculates their daily logarithmic returns.

    This function downloads historical stock data for the specified tickers over a given period, calculates
    the daily logarithmic returns for each stock, and returns two pandas DataFrames: one containing the log returns and
    another containing the closing prices. The function also handles missing data by performing linear interpolation.

    Parameters:
    stocks (list or str): A list of stock tickers (as strings) or a single stock ticker.
    end_date (datetime): The end date for the historical data. Defaults to the current date.
    years (int): The number of years of historical data to retrieve. Defaults to 5 years.

    Returns:
    tuple: A tuple containing:
        - pandas.DataFrame: The daily log returns of the stocks.
        - pandas.DataFrame: The historical closing prices of the stocks.
    """

    start_date = end_date - dt.timedelta(days=366 * years)
    historical = yf.download(stocks, start=start_date, end=end_date)

    # Extract the closing prices from the historical data
    historical = historical['Close']

    # Ensure the historical data is sorted by date in ascending order
    historical = historical.sort_index()

    # Perform linear interpolation to fill in any missing values in the historical closing prices
    historical = historical.interpolate(method='linear', axis=0)

    # Calculate the daily logarithmic returns of the stocks
    log_returns = np.log(historical / historical.shift(1))
    log_returns = log_returns.interpolate(method='linear', axis=0)
    log_returns = log_returns.drop(log_returns.index[0])

    return log_returns, historical


In [None]:
# Download historical data for large-cap and mid-cap stocks
returns,prices=return_stocks(mid_large_cap_stocks,end_date=dt.datetime.now() - dt.timedelta(days=366))
# Download historical data for the Athens General Index and create a new DataFrame with stock and index returns
r,p=return_stocks(['GD.AT'],end_date=dt.datetime.now() - dt.timedelta(days=366))
returns_plus_index=returns.copy()
returns_plus_index['GD.AT']=r

[*********************100%%**********************]  41 of 41 completed
[*********************100%%**********************]  1 of 1 completed


In [None]:
returns_plus_index

Ticker,ADMIE.AT,AEGN.AT,ALPHA.AT,AVAX.AT,BELA.AT,CENER.AT,EEE.AT,ELHA.AT,ELLAKTOR.AT,ELPE.AT,...,PPA.AT,PPC.AT,PREMIA.AT,PROF.AT,QUEST.AT,SAR.AT,TENERGY.AT,TPEIR.AT,VIO.AT,GD.AT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-08-07,0.005744,0.006035,-0.005420,0.003442,-0.004396,0.000000,0.001947,-0.006536,-0.005757,0.008427,...,0.000000,0.022618,0.000000,-0.020907,0.004158,-0.002729,0.005177,0.010464,0.012821,0.002183
2018-08-08,-0.001146,0.004802,-0.004357,-0.002294,-0.001469,0.000000,-0.009121,-0.023218,0.001154,0.000000,...,-0.007126,-0.011784,0.000000,-0.032203,-0.004158,0.000000,-0.012121,-0.011215,-0.006390,-0.004278
2018-08-09,-0.001147,-0.004802,-0.012082,-0.033860,-0.002946,0.003145,0.000000,-0.003361,0.020548,0.000000,...,0.001191,-0.018489,0.000000,0.053110,0.012423,0.010870,-0.006993,-0.014389,-0.003210,-0.004482
2018-08-10,-0.033862,-0.014546,-0.016151,0.002373,-0.020865,-0.004721,-0.025855,-0.034250,-0.006802,-0.014085,...,-0.003578,-0.026698,0.000000,0.000000,-0.004124,-0.013606,0.005249,-0.038092,-0.022765,-0.014576
2018-08-13,-0.013150,-0.013522,-0.077613,-0.011922,-0.013647,-0.038590,-0.024474,0.003478,-0.016056,-0.022957,...,-0.014441,-0.016482,0.000000,-0.075170,-0.008299,-0.013793,-0.005249,-0.086003,-0.028359,-0.031441
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-03,-0.002094,0.026338,0.013759,0.060625,0.000752,0.011835,-0.003033,0.004587,-0.016129,-0.001220,...,0.008753,0.005742,0.006173,0.013423,-0.009390,0.015625,0.005780,0.008643,0.003058,0.006458
2023-08-04,0.000000,0.001528,0.002481,0.022278,-0.008299,-0.002946,-0.003423,0.051293,0.000000,-0.001222,...,0.010834,0.005709,0.021310,0.015436,-0.003150,0.007722,-0.009847,0.001147,0.024134,0.003956
2023-08-07,0.014568,0.012140,-0.014667,0.003665,0.001514,-0.013363,0.004940,-0.017544,0.000000,0.015767,...,-0.032862,-0.000949,0.008996,-0.017661,-0.009509,0.014004,-0.008182,0.022664,-0.001491,0.000856
2023-08-08,-0.012474,-0.029076,-0.009476,-0.019705,0.002267,-0.028812,-0.001138,-0.013363,0.024098,-0.012107,...,-0.011198,-0.025001,0.000000,-0.011198,0.003180,0.002525,0.004684,-0.006745,-0.030305,-0.007189
