# This notebook extract the informations regarding the second election.

This step extracts tables from Yahoo finance, creates and renames csv, that are finally downloaded.

In [180]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Liste des URLs et noms correspondants
urls = {
    "https://fr.finance.yahoo.com/quote/%5EFCHI/history/": "CAC40",
    "https://fr.finance.yahoo.com/quote/%5EGSPC/history/": "S&P500",
    "https://fr.finance.yahoo.com/quote/GC%3DF/history/": "Gold",
    "https://fr.finance.yahoo.com/quote/EURUSD%3DX/history/": "EURUSD",
    "https://fr.finance.yahoo.com/quote/%5EIXIC/history/": "NASDAQ",
    "https://fr.finance.yahoo.com/quote/%5EN225/history/": "Nikkei",
    "https://fr.finance.yahoo.com/quote/ZN%3DF/history/": "Taux10ans"
}

# Fonction pour scraper un tableau sur une URL
def scrape_table_from_url(url):
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table')

    if table:
        rows = table.find_all('tr')
        data = []

        for row in rows:
            cols = row.find_all(['th', 'td'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Création d'un DataFrame pandas
        df = pd.DataFrame(data)
        return df
    else:
        print(f"Aucun tableau trouvé pour l'URL : {url}")
        return None

# Scraper tous les tableaux des URLs
all_tables = {}

for url in urls:
    df = scrape_table_from_url(url)
    if df is not None:
        all_tables[url] = df



# Scraper les tableaux et ajouter la colonne à gauche
for url, name in urls.items():
    df = scrape_table_from_url(url)
    if df is not None:
        # Ajouter la colonne avec le nom du marché à gauche
        df.insert(0, "Indice", name)
        
        # Sauvegarde du DataFrame dans un fichier CSV avec le nom spécifique
        filename = f"{name}_data.csv"
        df.to_csv(filename, index=False)
        print(f"Fichier CSV enregistré sous le nom : {filename}")


Fichier CSV enregistré sous le nom : CAC40_data.csv
Fichier CSV enregistré sous le nom : S&P500_data.csv
Fichier CSV enregistré sous le nom : Gold_data.csv
Fichier CSV enregistré sous le nom : EURUSD_data.csv
Fichier CSV enregistré sous le nom : NASDAQ_data.csv
Fichier CSV enregistré sous le nom : Nikkei_data.csv
Fichier CSV enregistré sous le nom : Taux10ans_data.csv


We transform the previously made CSV into pandas DataFrames

In [181]:
CAC = pd.read_csv('CAC40_data.csv')
SandP500 = pd.read_csv('S&P500_data.csv')
Gold = pd.read_csv('Gold_data.csv')
EURUSD = pd.read_csv('EURUSD_data.csv')
NASDAC = pd.read_csv('NASDAQ_data.csv')
Nikkei = pd.read_csv('Nikkei_data.csv')
Taux10ans = pd.read_csv('Taux10ans_data.csv')

Creating a list of DataFrames for merging each of them at once

In [182]:
# Liste des DataFrames déjà chargés
dfs = {
    "CAC": CAC,
    "SandP500": SandP500,
    "Gold": Gold,
    "EURUSD": EURUSD,
    "NASDAC": NASDAC,
    "Nikkei": Nikkei,
    "Taux10ans": Taux10ans
}

In [183]:
from datetime import datetime

# Date de l'élection de Donald Trump
date_election = datetime(2024, 7, 1)

# Concaténer tous les DataFrames en un seul
final_df = pd.concat(dfs.values(), ignore_index=True)


In [184]:
final_df

Unnamed: 0,Indice,0,1,2,3,4,5,6
0,CAC40,Date,Ouverture,Plus haut,Plus bas,Fermer Cours de clôture ajusté en fonctio...,Clôture ajustée Cours de clôture ajusté p...,Volume
1,CAC40,16 janv. 2025,"7 591,35","7 634,74","7 567,67","7 634,74","7 634,74",-
2,CAC40,15 janv. 2025,"7 447,39","7 514,91","7 415,52","7 474,59","7 474,59",90 347 900
3,CAC40,14 janv. 2025,"7 481,95","7 499,77","7 423,49","7 423,67","7 423,67",75 439 900
4,CAC40,13 janv. 2025,"7 409,56","7 424,51","7 353,05","7 408,64","7 408,64",61 436 800
...,...,...,...,...,...,...,...,...
1781,Taux10ans,22 janv. 2024,111234375,111609375,111140625,111500000,111500000,1 325 752
1782,Taux10ans,19 janv. 2024,111218750,111296875,110812500,111125000,111125000,1 538 398
1783,Taux10ans,18 janv. 2024,111421875,111656250,111140625,111203125,111203125,1 703 768
1784,Taux10ans,17 janv. 2024,111906250,112046875,111281250,111421875,111421875,2 139 591


Reseting the index to have the first row as column names.

In [185]:
if final_df.iloc[0].isna().sum() > 0:
    print("Attention : certaines colonnes de la première ligne sont vides.")
else:
    final_df.columns = final_df.iloc[0]
    final_df = final_df.drop(0).reset_index(drop=True)

In [186]:
final_df

Unnamed: 0,CAC40,Date,Ouverture,Plus haut,Plus bas,Fermer Cours de clôture ajusté en fonction des fractionnements.,Clôture ajustée Cours de clôture ajusté pour les fractionnements et les distributions de dividendes et/ou de plus-values.,Volume
0,CAC40,16 janv. 2025,"7 591,35","7 634,74","7 567,67","7 634,74","7 634,74",-
1,CAC40,15 janv. 2025,"7 447,39","7 514,91","7 415,52","7 474,59","7 474,59",90 347 900
2,CAC40,14 janv. 2025,"7 481,95","7 499,77","7 423,49","7 423,67","7 423,67",75 439 900
3,CAC40,13 janv. 2025,"7 409,56","7 424,51","7 353,05","7 408,64","7 408,64",61 436 800
4,CAC40,10 janv. 2025,"7 495,21","7 521,22","7 417,59","7 431,04","7 431,04",65 416 600
...,...,...,...,...,...,...,...,...
1780,Taux10ans,22 janv. 2024,111234375,111609375,111140625,111500000,111500000,1 325 752
1781,Taux10ans,19 janv. 2024,111218750,111296875,110812500,111125000,111125000,1 538 398
1782,Taux10ans,18 janv. 2024,111421875,111656250,111140625,111203125,111203125,1 703 768
1783,Taux10ans,17 janv. 2024,111906250,112046875,111281250,111421875,111421875,2 139 591


We change the date format 

In [187]:
import locale
locale.setlocale(locale.LC_TIME, 'fr_FR.UTF-8')
final_df['Date'] = pd.to_datetime(final_df['Date'], format='%d %b %Y', errors='coerce')


We create a after/before the election variable 

In [188]:

# Conversion des colonnes de date et ajout de la colonne "Avant/Après élection"
final_df['Date'] = pd.to_datetime(final_df['Date'], errors='coerce')
final_df['Avant_Apres_Election'] = final_df['Date'].apply(lambda x: 'Before' if x < date_election else 'After')

# Exporter vers un CSV consolidé
final_df.to_csv("indices_financiers_consolides.csv", index=False)
print("Le fichier CSV consolidé a été enregistré sous le nom : indices_financiers_consolides.csv")

Le fichier CSV consolidé a été enregistré sous le nom : indices_financiers_consolides.csv


In [189]:
final_df

Unnamed: 0,CAC40,Date,Ouverture,Plus haut,Plus bas,Fermer Cours de clôture ajusté en fonction des fractionnements.,Clôture ajustée Cours de clôture ajusté pour les fractionnements et les distributions de dividendes et/ou de plus-values.,Volume,Avant_Apres_Election
0,CAC40,2025-01-16,"7 591,35","7 634,74","7 567,67","7 634,74","7 634,74",-,After
1,CAC40,2025-01-15,"7 447,39","7 514,91","7 415,52","7 474,59","7 474,59",90 347 900,After
2,CAC40,2025-01-14,"7 481,95","7 499,77","7 423,49","7 423,67","7 423,67",75 439 900,After
3,CAC40,2025-01-13,"7 409,56","7 424,51","7 353,05","7 408,64","7 408,64",61 436 800,After
4,CAC40,2025-01-10,"7 495,21","7 521,22","7 417,59","7 431,04","7 431,04",65 416 600,After
...,...,...,...,...,...,...,...,...,...
1780,Taux10ans,2024-01-22,111234375,111609375,111140625,111500000,111500000,1 325 752,Before
1781,Taux10ans,2024-01-19,111218750,111296875,110812500,111125000,111125000,1 538 398,Before
1782,Taux10ans,2024-01-18,111421875,111656250,111140625,111203125,111203125,1 703 768,Before
1783,Taux10ans,2024-01-17,111906250,112046875,111281250,111421875,111421875,2 139 591,Before


Deleting empty date values

In [190]:
final_df = final_df.dropna(subset=['Date'])  # Supprime les lignes sans date
final_df['Date'] = pd.to_datetime(final_df['Date'], errors='coerce')  # Convertit la colonne en datetime


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['Date'] = pd.to_datetime(final_df['Date'], errors='coerce')  # Convertit la colonne en datetime


In [191]:
final_df

Unnamed: 0,CAC40,Date,Ouverture,Plus haut,Plus bas,Fermer Cours de clôture ajusté en fonction des fractionnements.,Clôture ajustée Cours de clôture ajusté pour les fractionnements et les distributions de dividendes et/ou de plus-values.,Volume,Avant_Apres_Election
0,CAC40,2025-01-16,"7 591,35","7 634,74","7 567,67","7 634,74","7 634,74",-,After
1,CAC40,2025-01-15,"7 447,39","7 514,91","7 415,52","7 474,59","7 474,59",90 347 900,After
2,CAC40,2025-01-14,"7 481,95","7 499,77","7 423,49","7 423,67","7 423,67",75 439 900,After
3,CAC40,2025-01-13,"7 409,56","7 424,51","7 353,05","7 408,64","7 408,64",61 436 800,After
4,CAC40,2025-01-10,"7 495,21","7 521,22","7 417,59","7 431,04","7 431,04",65 416 600,After
...,...,...,...,...,...,...,...,...,...
1780,Taux10ans,2024-01-22,111234375,111609375,111140625,111500000,111500000,1 325 752,Before
1781,Taux10ans,2024-01-19,111218750,111296875,110812500,111125000,111125000,1 538 398,Before
1782,Taux10ans,2024-01-18,111421875,111656250,111140625,111203125,111203125,1 703 768,Before
1783,Taux10ans,2024-01-17,111906250,112046875,111281250,111421875,111421875,2 139 591,Before


Some verifications

In [192]:
filtered_df_before_election = final_df[final_df['Avant_Apres_Election'] == 'Before']
filtered_df_before_election


Unnamed: 0,CAC40,Date,Ouverture,Plus haut,Plus bas,Fermer Cours de clôture ajusté en fonction des fractionnements.,Clôture ajustée Cours de clôture ajusté pour les fractionnements et les distributions de dividendes et/ou de plus-values.,Volume,Avant_Apres_Election
141,CAC40,2024-06-28,"7 544,40","7 544,40","7 456,47","7 479,40","7 479,40",82 277 400,Before
142,CAC40,2024-06-27,"7 619,10","7 626,38","7 522,16","7 530,72","7 530,72",63 132 200,Before
143,CAC40,2024-06-26,"7 703,80","7 707,15","7 559,63","7 609,15","7 609,15",66 431 400,Before
144,CAC40,2024-06-25,"7 648,14","7 671,06","7 617,16","7 662,30","7 662,30",66 819 100,Before
145,CAC40,2024-06-24,"7 633,99","7 725,28","7 629,27","7 706,89","7 706,89",60 676 900,Before
...,...,...,...,...,...,...,...,...,...
1780,Taux10ans,2024-01-22,111234375,111609375,111140625,111500000,111500000,1 325 752,Before
1781,Taux10ans,2024-01-19,111218750,111296875,110812500,111125000,111125000,1 538 398,Before
1782,Taux10ans,2024-01-18,111421875,111656250,111140625,111203125,111203125,1 703 768,Before
1783,Taux10ans,2024-01-17,111906250,112046875,111281250,111421875,111421875,2 139 591,Before


In [193]:
from datetime import datetime

# Définir la date pivot
date_pivot = datetime(2024, 7, 1)

# S'assurer que la colonne 'Date' est bien convertie en datetime
final_df['Date'] = pd.to_datetime(final_df['Date'], errors='coerce')

# Filtrer les lignes avant la date pivot
filtered_df_before_pivot = final_df[final_df['Date'] < date_pivot]

# Affichage des premières lignes des données avant le 1er juillet 2024
print(filtered_df_before_pivot.head())

# Optionnel : Exporter vers un CSV si besoin
filtered_df_before_pivot.to_csv("avant_1_juillet_2024.csv", index=False)
print("Les données avant le 1er juillet 2024 ont été exportées dans avant_1_juillet_2024.csv")


0    CAC40       Date Ouverture Plus haut  Plus bas  \
141  CAC40 2024-06-28  7 544,40  7 544,40  7 456,47   
142  CAC40 2024-06-27  7 619,10  7 626,38  7 522,16   
143  CAC40 2024-06-26  7 703,80  7 707,15  7 559,63   
144  CAC40 2024-06-25  7 648,14  7 671,06  7 617,16   
145  CAC40 2024-06-24  7 633,99  7 725,28  7 629,27   

0   Fermer      Cours de clôture ajusté en fonction des fractionnements.  \
141                                           7 479,40                     
142                                           7 530,72                     
143                                           7 609,15                     
144                                           7 662,30                     
145                                           7 706,89                     

0   Clôture ajustée      Cours de clôture ajusté pour les fractionnements et les distributions de dividendes et/ou de plus-values.  \
141                                           7 479,40                        

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['Date'] = pd.to_datetime(final_df['Date'], errors='coerce')


In [194]:
filtered_df_before_pivot

Unnamed: 0,CAC40,Date,Ouverture,Plus haut,Plus bas,Fermer Cours de clôture ajusté en fonction des fractionnements.,Clôture ajustée Cours de clôture ajusté pour les fractionnements et les distributions de dividendes et/ou de plus-values.,Volume,Avant_Apres_Election
141,CAC40,2024-06-28,"7 544,40","7 544,40","7 456,47","7 479,40","7 479,40",82 277 400,Before
142,CAC40,2024-06-27,"7 619,10","7 626,38","7 522,16","7 530,72","7 530,72",63 132 200,Before
143,CAC40,2024-06-26,"7 703,80","7 707,15","7 559,63","7 609,15","7 609,15",66 431 400,Before
144,CAC40,2024-06-25,"7 648,14","7 671,06","7 617,16","7 662,30","7 662,30",66 819 100,Before
145,CAC40,2024-06-24,"7 633,99","7 725,28","7 629,27","7 706,89","7 706,89",60 676 900,Before
...,...,...,...,...,...,...,...,...,...
1780,Taux10ans,2024-01-22,111234375,111609375,111140625,111500000,111500000,1 325 752,Before
1781,Taux10ans,2024-01-19,111218750,111296875,110812500,111125000,111125000,1 538 398,Before
1782,Taux10ans,2024-01-18,111421875,111656250,111140625,111203125,111203125,1 703 768,Before
1783,Taux10ans,2024-01-17,111906250,112046875,111281250,111421875,111421875,2 139 591,Before


In [195]:
erreurs_dates = final_df[final_df['Date'].isna()]
print(erreurs_dates[['Date']].head(10))  # Affiche les 10 premières valeurs problématiques



Empty DataFrame
Columns: [Date]
Index: []


Creating new columns

In [196]:
final_df['Day_of_Week'] = final_df['Date'].dt.day_name()
final_df['Month'] = final_df['Date'].dt.month_name()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['Day_of_Week'] = final_df['Date'].dt.day_name()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['Month'] = final_df['Date'].dt.month_name()


In [197]:
final_df['Is_Holiday_Period'] = final_df['Month'].apply(lambda x: 'yes' if x in ['November', 'December'] else 'no')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['Is_Holiday_Period'] = final_df['Month'].apply(lambda x: 'yes' if x in ['November', 'December'] else 'no')


In [198]:
final_df

Unnamed: 0,CAC40,Date,Ouverture,Plus haut,Plus bas,Fermer Cours de clôture ajusté en fonction des fractionnements.,Clôture ajustée Cours de clôture ajusté pour les fractionnements et les distributions de dividendes et/ou de plus-values.,Volume,Avant_Apres_Election,Day_of_Week,Month,Is_Holiday_Period
0,CAC40,2025-01-16,"7 591,35","7 634,74","7 567,67","7 634,74","7 634,74",-,After,Thursday,January,no
1,CAC40,2025-01-15,"7 447,39","7 514,91","7 415,52","7 474,59","7 474,59",90 347 900,After,Wednesday,January,no
2,CAC40,2025-01-14,"7 481,95","7 499,77","7 423,49","7 423,67","7 423,67",75 439 900,After,Tuesday,January,no
3,CAC40,2025-01-13,"7 409,56","7 424,51","7 353,05","7 408,64","7 408,64",61 436 800,After,Monday,January,no
4,CAC40,2025-01-10,"7 495,21","7 521,22","7 417,59","7 431,04","7 431,04",65 416 600,After,Friday,January,no
...,...,...,...,...,...,...,...,...,...,...,...,...
1780,Taux10ans,2024-01-22,111234375,111609375,111140625,111500000,111500000,1 325 752,Before,Monday,January,no
1781,Taux10ans,2024-01-19,111218750,111296875,110812500,111125000,111125000,1 538 398,Before,Friday,January,no
1782,Taux10ans,2024-01-18,111421875,111656250,111140625,111203125,111203125,1 703 768,Before,Thursday,January,no
1783,Taux10ans,2024-01-17,111906250,112046875,111281250,111421875,111421875,2 139 591,Before,Wednesday,January,no


Renaming the columns

In [199]:
print(final_df.columns.to_list())


['CAC40', 'Date', 'Ouverture', 'Plus haut', 'Plus bas', 'Fermer      Cours de clôture ajusté en fonction des fractionnements.', 'Clôture ajustée      Cours de clôture ajusté pour les fractionnements et les distributions de dividendes et/ou de plus-values.', 'Volume', 'Avant_Apres_Election', 'Day_of_Week', 'Month', 'Is_Holiday_Period']


In [200]:
final_df.rename(columns={
    'CAC40': 'Index',
    'Ouverture': 'Opening',
    'Plus haut': 'Highest',
    'Plus bas': 'Lowest',
    'Fermer      Cours de clôture ajusté en fonction des fractionnements.': 'Closure',
    'Clôture ajustée      Cours de clôture ajusté pour les fractionnements et les distributions de dividendes et/ou de plus-values.': 'Adjusted_Closure',
    'Avant_Apres_Election': 'Before_After_Election'
}, inplace=True)
final_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df.rename(columns={


Unnamed: 0,Index,Date,Opening,Highest,Lowest,Closure,Adjusted_Closure,Volume,Before_After_Election,Day_of_Week,Month,Is_Holiday_Period
0,CAC40,2025-01-16,"7 591,35","7 634,74","7 567,67","7 634,74","7 634,74",-,After,Thursday,January,no
1,CAC40,2025-01-15,"7 447,39","7 514,91","7 415,52","7 474,59","7 474,59",90 347 900,After,Wednesday,January,no
2,CAC40,2025-01-14,"7 481,95","7 499,77","7 423,49","7 423,67","7 423,67",75 439 900,After,Tuesday,January,no
3,CAC40,2025-01-13,"7 409,56","7 424,51","7 353,05","7 408,64","7 408,64",61 436 800,After,Monday,January,no
4,CAC40,2025-01-10,"7 495,21","7 521,22","7 417,59","7 431,04","7 431,04",65 416 600,After,Friday,January,no
...,...,...,...,...,...,...,...,...,...,...,...,...
1780,Taux10ans,2024-01-22,111234375,111609375,111140625,111500000,111500000,1 325 752,Before,Monday,January,no
1781,Taux10ans,2024-01-19,111218750,111296875,110812500,111125000,111125000,1 538 398,Before,Friday,January,no
1782,Taux10ans,2024-01-18,111421875,111656250,111140625,111203125,111203125,1 703 768,Before,Thursday,January,no
1783,Taux10ans,2024-01-17,111906250,112046875,111281250,111421875,111421875,2 139 591,Before,Wednesday,January,no


Downloading the final df

In [201]:
final_df.to_csv('final_df.csv')