# Introduction

Fetching the data from AEX for all the stocks scraped from the list acquired on wikipedia

## Preamble

In [1]:
import os
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import yfinance as yf
import requests
from bs4 import BeautifulSoup

In [2]:
print(yf.__version__)  # Prints the version of pandas

0.2.37


In [3]:
cwd = os.getcwd()

In [4]:
#print(dir(yf))

In [5]:
#print(dir(yf.Ticker))

In [6]:
#print(dir(yf.Tickers))

## Web Scraper

In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the Wikipedia page for the AEX index
url = "https://nl.wikipedia.org/wiki/AEX"

# Send a GET request to the Wikipedia page
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the HTML content of the page using BeautifulSoup
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Find all tables on the page
    tables = soup.find_all("table", class_="wikitable sortable")
    
    # Check if there are at least two tables
    if len(tables) >= 2:
        # Extract the second table (index 1)
        table = tables[1]
        
        # Extract the rows from the table
        rows = table.find_all("tr")
        
        # Extract the column headers
        headers = [header.text.strip() for header in rows[0].find_all("th")]
        
        # Extract the data from the table rows
        data = []
        for row in rows[1:]:
            data.append([cell.text.strip() for cell in row.find_all("td")])
        
        # Convert the data to a DataFrame
        df = pd.DataFrame(data, columns=headers)
        
        # Print the DataFrame
        #print("DataFrame for the second table on AEX index:")
        #print(df)
    else:
        print("Second table not found on the page.")
else:
    print("Failed to retrieve data from Wikipedia.")


In [8]:
print(df["Ticker symbol"].tolist())

['ABN', 'ADYEN', 'AGN', 'AD', 'AKZA', 'MT', 'ASM', 'ASML', 'ASRNL', 'BESI', 'DSM', 'EXOR', 'HEIA', 'IMCD', 'INGA', 'KPN', 'NN', 'PHIA', 'PRX', 'RAND', 'REN', 'RDSA', 'UNA', 'UMG', 'WKL']


## Check Available Dates

In [9]:
# List of stock symbols
stocks = df["Ticker symbol"].tolist()
aex_exchange_stringcode = ".AS"

# Dictionary to store available dates for each stock
available_dates = {}

# Fetch available dates for each stock
for stock_symbol in stocks:
    try:
        # Fetch historical data for the stock
        stock_data = yf.Ticker(stock_symbol + aex_exchange_stringcode).history(period='max')
        
        # Get the available dates
        available_dates[stock_symbol] = stock_data.index
    except Exception as e:
        print(f"Error fetching data for {stock_symbol}: {e}")

# Print available dates for each stock
for stock_symbol, dates in available_dates.items():
    print(f"Available dates for {stock_symbol}: {dates}")


DSM.AS: No timezone found, symbol may be delisted
EXOR.AS: No timezone found, symbol may be delisted
RDSA.AS: No timezone found, symbol may be delisted


Available dates for ABN: DatetimeIndex(['2015-11-20 00:00:00+01:00', '2015-11-23 00:00:00+01:00',
               '2015-11-24 00:00:00+01:00', '2015-11-25 00:00:00+01:00',
               '2015-11-26 00:00:00+01:00', '2015-11-27 00:00:00+01:00',
               '2015-11-30 00:00:00+01:00', '2015-12-01 00:00:00+01:00',
               '2015-12-02 00:00:00+01:00', '2015-12-03 00:00:00+01:00',
               ...
               '2024-03-28 00:00:00+01:00', '2024-04-02 00:00:00+02:00',
               '2024-04-03 00:00:00+02:00', '2024-04-04 00:00:00+02:00',
               '2024-04-05 00:00:00+02:00', '2024-04-08 00:00:00+02:00',
               '2024-04-09 00:00:00+02:00', '2024-04-10 00:00:00+02:00',
               '2024-04-11 00:00:00+02:00', '2024-04-12 00:00:00+02:00'],
              dtype='datetime64[ns, Europe/Amsterdam]', name='Date', length=2149, freq=None)
Available dates for ADYEN: DatetimeIndex(['2018-06-13 00:00:00+02:00', '2018-06-14 00:00:00+02:00',
               '2018-06-15 00:00

## Stocks Fetcher

In [10]:
# List of AEX stocks
aex_stocks = df["Ticker symbol"].tolist()
#start_date ='2022-01-01'
#end_date = '2022-12-31'
aex_exchange_symbol = ".AS" # Append ".AS" for Amsterdam exchange

# Directory to save the dataframes
output_directory = "aex_data/" 

# Create the output directory if it doesn't exist
try:
    os.makedirs(output_directory)
    print(f"Output directory '{output_directory}' created successfully.")
except FileExistsError:
    print(f"Output directory '{output_directory}' already exists.")

os_path = os.path.join(cwd, output_directory)
# Download historical data for each stock
failed_stocks = []

for stock in aex_stocks:
    try:
        # Fetch historical data for the stock
        stock_data = yf.download(stock + aex_exchange_symbol, period='max')
        
        # Check if data is not empty
        if not stock_data.empty:
            # Get the maximum available start and end dates
            start_date = stock_data.index.min().strftime('%Y-%m-%d')
            end_date = stock_data.index.max().strftime('%Y-%m-%d')
            
            # Save the dataframe to CSV
            file_path = os.path.join(os_path, f"{stock}_data_{start_date}_{end_date}.csv")
            stock_data.to_csv(file_path)
            
            print(f"Successfully downloaded and saved data for {stock}.")
        else:
            print(f"Downloaded data for {stock} is empty.")
            failed_stocks.append(stock)
    except Exception as e:
        print(f"Failed to download data for {stock}: {e}")
        failed_stocks.append(stock)

# Print failed stocks
print("Failed to download data for the following stocks:", failed_stocks)

Output directory 'aex_data/' already exists.


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for ABN.
Successfully downloaded and saved data for ADYEN.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for AGN.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for AD.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for AKZA.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for MT.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for ASM.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for ASML.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for ASRNL.


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['DSM.AS']: Exception('%ticker%: No timezone found, symbol may be delisted')
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['EXOR.AS']: Exception('%ticker%: No timezone found, symbol may be delisted')


Successfully downloaded and saved data for BESI.
Downloaded data for DSM is empty.
Downloaded data for EXOR is empty.


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Successfully downloaded and saved data for HEIA.





Successfully downloaded and saved data for IMCD.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for INGA.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for KPN.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for NN.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for PHIA.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for PRX.


[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for RAND.


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['RDSA.AS']: Exception('%ticker%: No timezone found, symbol may be delisted')


Successfully downloaded and saved data for REN.
Downloaded data for RDSA is empty.


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Successfully downloaded and saved data for UNA.
Successfully downloaded and saved data for UMG.


[*********************100%%**********************]  1 of 1 completed

Successfully downloaded and saved data for WKL.
Failed to download data for the following stocks: ['DSM', 'EXOR', 'RDSA']



