# This notebook extract the informations regarding the second election.

This step extracts tables from Yahoo finance, creates and renames csv, that are finally downloaded.

In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Liste des URLs et noms correspondants
urls = {"https://finance.yahoo.com/quote/%5ETNX/history/?period1=1295827200&period2=1737710549" : "Taux10ans",}

# Fonction pour scraper un tableau sur une URL
def scrape_table_from_url(url):
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table')

    if table:
        rows = table.find_all('tr')
        data = []

        for row in rows:
            cols = row.find_all(['th', 'td'])
            cols = [col.text.strip() for col in cols]
            data.append(cols)

        # Création d'un DataFrame pandas
        df = pd.DataFrame(data)
        return df
    else:
        print(f"Aucun tableau trouvé pour l'URL : {url}")
        return None

# Scraper tous les tableaux des URLs
all_tables = {}

for url in urls:
    df = scrape_table_from_url(url)
    if df is not None:
        all_tables[url] = df




# Scraper les tableaux et ajouter la colonne à gauche
for url, name in urls.items():
    df = scrape_table_from_url(url)
    if df is not None:
        # Ajouter la colonne avec le nom du marché à gauche
        df.insert(0, "Indice", name)
        
        # Sauvegarde du DataFrame dans un fichier CSV avec le nom spécifique
        filename = f"{name}all_data.csv"
        df.to_csv(filename, index=False)
        print(f"Fichier CSV enregistré sous le nom : {filename}")


Fichier CSV enregistré sous le nom : Taux10ansall_data.csv


We transform the previously made CSV into pandas DataFrames

In [6]:

Taux10ans = pd.read_csv('Taux10ansall_data.csv')




In [7]:
# Suppression de la première ligne

Taux10ans = Taux10ans.iloc[1:]


In [8]:
months_translation = {
    'janv.': 'Jan', 'févr.': 'Feb', 'mars': 'Mar', 'avr.': 'Apr', 'mai': 'May', 'juin': 'Jun',
    'juil.': 'Jul', 'août': 'Aug', 'sept.': 'Sep', 'oct.': 'Oct', 'nov.': 'Nov', 'déc.': 'Dec'
}

# Remplacer les mois en français par les mois en anglais

Taux10ans['0'] = Taux10ans['0'].replace(months_translation, regex=True)



In [9]:
# Conversion en datetime

Taux10ans['0'] = pd.to_datetime(Taux10ans['0'], dayfirst=True)


Creating a list of DataFrames for merging each of them at once

In [10]:
# Liste des DataFrames déjà chargés
dfs = {
  
    "Taux10ans": Taux10ans,
    
}

In [11]:
from datetime import datetime

# Date de l'élection de Donald Trump
date_election = datetime(2012, 11, 5)

# Concaténer tous les DataFrames en un seul
final_df = pd.concat(dfs.values(), ignore_index=True)


In [12]:
final_df


Unnamed: 0,Indice,0,1,2,3,4,5,6
0,Taux10ans,2025-01-23,4.6380,4.6640,4.6170,4.6380,4.6380,-
1,Taux10ans,2025-01-22,4.5600,4.6170,4.5600,4.5990,4.5990,-
2,Taux10ans,2025-01-21,4.5740,4.5850,4.5520,4.5740,4.5740,-
3,Taux10ans,2025-01-17,4.5720,4.6250,4.5680,4.6090,4.6090,-
4,Taux10ans,2025-01-16,4.6840,4.6940,4.5880,4.6060,4.6060,-
...,...,...,...,...,...,...,...,...
3516,Taux10ans,2011-01-28,3.4210,3.4530,3.3100,3.3290,3.3290,-
3517,Taux10ans,2011-01-27,3.4620,3.4620,3.3790,3.3850,3.3850,-
3518,Taux10ans,2011-01-26,3.3790,3.4420,3.3620,3.4280,3.4280,-
3519,Taux10ans,2011-01-25,3.3710,3.4200,3.3090,3.3190,3.3190,-


Indice                                                                                                                            object
Date                                                                                                                              object
Ouverture                                                                                                                         object
Plus haut                                                                                                                         object
Plus bas                                                                                                                          object
Fermer      Cours de clôture ajusté en fonction des fractionnements.                                                              object
Clôture ajustée      Cours de clôture ajusté pour les fractionnements et les distributions de dividendes et/ou de plus-values.    object
Volume                                                                                                                            object

dtype: object

In [13]:
final_df = final_df.rename(columns={'0' :'Date', '1':'Ouverture', '2' : 'Plus haut', '3' : 'Plus bas', '4' : 'Fermer', '5' : 'Cloture ajusté', '6' :'Volume'})

# Problème avec le mois qui contiennt des accents


Reseting the index to have the first row as column names.

We create a after/before the election variable 

In [14]:

final_df['Avant_Apres_Election'] = final_df['Date'].apply(lambda x: 'Before' if x < date_election else 'After')

# Exporter vers un CSV consolidé
final_df.to_csv("indices_financiers_consolidesall.csv", index=False)
print("Le fichier CSV consolidé a été enregistré sous le nom : indices_financiers_consolides1.csv")

Le fichier CSV consolidé a été enregistré sous le nom : indices_financiers_consolides1.csv


In [15]:
final_df

Unnamed: 0,Indice,Date,Ouverture,Plus haut,Plus bas,Fermer,Cloture ajusté,Volume,Avant_Apres_Election
0,Taux10ans,2025-01-23,4.6380,4.6640,4.6170,4.6380,4.6380,-,After
1,Taux10ans,2025-01-22,4.5600,4.6170,4.5600,4.5990,4.5990,-,After
2,Taux10ans,2025-01-21,4.5740,4.5850,4.5520,4.5740,4.5740,-,After
3,Taux10ans,2025-01-17,4.5720,4.6250,4.5680,4.6090,4.6090,-,After
4,Taux10ans,2025-01-16,4.6840,4.6940,4.5880,4.6060,4.6060,-,After
...,...,...,...,...,...,...,...,...,...
3516,Taux10ans,2011-01-28,3.4210,3.4530,3.3100,3.3290,3.3290,-,Before
3517,Taux10ans,2011-01-27,3.4620,3.4620,3.3790,3.3850,3.3850,-,Before
3518,Taux10ans,2011-01-26,3.3790,3.4420,3.3620,3.4280,3.4280,-,Before
3519,Taux10ans,2011-01-25,3.3710,3.4200,3.3090,3.3190,3.3190,-,Before


Some verifications

In [22]:
filtered_df_before_election = final_df[final_df['Avant_Apres_Election'] == 'Before']
filtered_df_before_election


Unnamed: 0,Indice,Date,Ouverture,Plus haut,Plus bas,Fermer,Cloture ajusté,Volume,Avant_Apres_Election
1334,CAC40,2016-11-04,"4 403,16","4 403,64","4 355,81","4 377,46","4 377,46",101 731 700,Before
1335,CAC40,2016-11-03,"4 402,79","4 456,21","4 399,29","4 411,68","4 411,68",101 390 600,Before
1336,CAC40,2016-11-02,"4 437,99","4 449,44","4 414,67","4 414,67","4 414,67",96 987 600,Before
1337,CAC40,2016-11-01,"4 532,34","4 537,99","4 460,54","4 470,28","4 470,28",79 346 900,Before
1338,CAC40,2016-10-31,"4 525,60","4 540,26","4 500,19","4 509,26","4 509,26",88 366 600,Before
...,...,...,...,...,...,...,...,...,...
13617,DowJ,2016-01-29,15957.00,16376.00,15937.00,16356.00,16356.00,217260,Before
13618,DowJ,2016-01-28,15861.00,16055.00,15768.00,15972.00,15972.00,268654,Before
13619,DowJ,2016-01-27,15999.00,16151.00,15789.00,15851.00,15851.00,261837,Before
13620,DowJ,2016-01-26,15810.00,16101.00,15640.00,16066.00,16066.00,193761,Before


In [23]:
from datetime import datetime

# Définir la date pivot
date_pivot = datetime(2012, 11, 5)



# Filtrer les lignes avant la date pivot
filtered_df_before_pivot = final_df[final_df['Date'] < date_pivot]

# Affichage des premières lignes des données avant le 5th november 2024
print(filtered_df_before_pivot.head())

# Optionnel : Exporter vers un CSV si besoin
filtered_df_before_pivot.to_csv("avant_1_juillet_2024.csv", index=False)
print("Les données avant le 1er juillet 2024 ont été exportées dans avant_novembre_2012.csv")


     Indice       Date Ouverture Plus haut  Plus bas    Fermer Cloture ajusté  \
1334  CAC40 2016-11-04  4 403,16  4 403,64  4 355,81  4 377,46       4 377,46   
1335  CAC40 2016-11-03  4 402,79  4 456,21  4 399,29  4 411,68       4 411,68   
1336  CAC40 2016-11-02  4 437,99  4 449,44  4 414,67  4 414,67       4 414,67   
1337  CAC40 2016-11-01  4 532,34  4 537,99  4 460,54  4 470,28       4 470,28   
1338  CAC40 2016-10-31  4 525,60  4 540,26  4 500,19  4 509,26       4 509,26   

           Volume Avant_Apres_Election  
1334  101 731 700               Before  
1335  101 390 600               Before  
1336   96 987 600               Before  
1337   79 346 900               Before  
1338   88 366 600               Before  
Les données avant le 1er juillet 2024 ont été exportées dans avant_1_juillet_2024.csv


In [24]:
filtered_df_before_pivot

Unnamed: 0,Indice,Date,Ouverture,Plus haut,Plus bas,Fermer,Cloture ajusté,Volume,Avant_Apres_Election
1334,CAC40,2016-11-04,"4 403,16","4 403,64","4 355,81","4 377,46","4 377,46",101 731 700,Before
1335,CAC40,2016-11-03,"4 402,79","4 456,21","4 399,29","4 411,68","4 411,68",101 390 600,Before
1336,CAC40,2016-11-02,"4 437,99","4 449,44","4 414,67","4 414,67","4 414,67",96 987 600,Before
1337,CAC40,2016-11-01,"4 532,34","4 537,99","4 460,54","4 470,28","4 470,28",79 346 900,Before
1338,CAC40,2016-10-31,"4 525,60","4 540,26","4 500,19","4 509,26","4 509,26",88 366 600,Before
...,...,...,...,...,...,...,...,...,...
13617,DowJ,2016-01-29,15957.00,16376.00,15937.00,16356.00,16356.00,217260,Before
13618,DowJ,2016-01-28,15861.00,16055.00,15768.00,15972.00,15972.00,268654,Before
13619,DowJ,2016-01-27,15999.00,16151.00,15789.00,15851.00,15851.00,261837,Before
13620,DowJ,2016-01-26,15810.00,16101.00,15640.00,16066.00,16066.00,193761,Before


In [25]:
erreurs_dates = final_df[final_df['Date'].isna()]
print(erreurs_dates[['Date']].head(10))  # Affiche les 10 premières valeurs problématiques



Empty DataFrame
Columns: [Date]
Index: []


Creating new columns

In [26]:
final_df['Day_of_Week'] = final_df['Date'].dt.day_name()
final_df['Month'] = final_df['Date'].dt.month_name()



In [27]:
final_df['Is_Holiday_Period'] = final_df['Month'].apply(lambda x: 'yes' if x in ['November', 'December'] else 'no')


In [28]:
final_df

Unnamed: 0,Indice,Date,Ouverture,Plus haut,Plus bas,Fermer,Cloture ajusté,Volume,Avant_Apres_Election,Day_of_Week,Month,Is_Holiday_Period
0,CAC40,2022-01-21,"7 088,44","7 122,06","7 013,49","7 068,59","7 068,59",110 697 300,After,Friday,January,no
1,CAC40,2022-01-20,"7 190,02","7 199,97","7 123,62","7 194,16","7 194,16",76 932 800,After,Thursday,January,no
2,CAC40,2022-01-19,"7 104,11","7 211,24","7 104,11","7 172,98","7 172,98",84 173 800,After,Wednesday,January,no
3,CAC40,2022-01-18,"7 172,43","7 175,46","7 098,19","7 133,83","7 133,83",78 561 500,After,Tuesday,January,no
4,CAC40,2022-01-17,"7 167,53","7 213,70","7 148,98","7 201,64","7 201,64",53 465 300,After,Monday,January,no
...,...,...,...,...,...,...,...,...,...,...,...,...
13617,DowJ,2016-01-29,15957.00,16376.00,15937.00,16356.00,16356.00,217260,Before,Friday,January,no
13618,DowJ,2016-01-28,15861.00,16055.00,15768.00,15972.00,15972.00,268654,Before,Thursday,January,no
13619,DowJ,2016-01-27,15999.00,16151.00,15789.00,15851.00,15851.00,261837,Before,Wednesday,January,no
13620,DowJ,2016-01-26,15810.00,16101.00,15640.00,16066.00,16066.00,193761,Before,Tuesday,January,no


Renaming the columns

In [29]:
print(final_df.columns.to_list())


['Indice', 'Date', 'Ouverture', 'Plus haut', 'Plus bas', 'Fermer', 'Cloture ajusté', 'Volume', 'Avant_Apres_Election', 'Day_of_Week', 'Month', 'Is_Holiday_Period']


In [30]:
final_df.rename(columns={
    'Taux10ans': 'Index',
    'Ouverture': 'Opening',
    'Plus haut': 'Highest',
    'Plus bas': 'Lowest',
    'Fermer      Cours de clôture ajusté en fonction des fractionnements.': 'Closure',
    'Clôture ajustée      Cours de clôture ajusté pour les fractionnements et les distributions de dividendes et/ou de plus-values.': 'Adjusted_Closure',
    'Avant_Apres_Election': 'Before_After_Election'
}, inplace=True)
final_df

Unnamed: 0,Indice,Date,Opening,Highest,Lowest,Fermer,Cloture ajusté,Volume,Before_After_Election,Day_of_Week,Month,Is_Holiday_Period
0,CAC40,2022-01-21,"7 088,44","7 122,06","7 013,49","7 068,59","7 068,59",110 697 300,After,Friday,January,no
1,CAC40,2022-01-20,"7 190,02","7 199,97","7 123,62","7 194,16","7 194,16",76 932 800,After,Thursday,January,no
2,CAC40,2022-01-19,"7 104,11","7 211,24","7 104,11","7 172,98","7 172,98",84 173 800,After,Wednesday,January,no
3,CAC40,2022-01-18,"7 172,43","7 175,46","7 098,19","7 133,83","7 133,83",78 561 500,After,Tuesday,January,no
4,CAC40,2022-01-17,"7 167,53","7 213,70","7 148,98","7 201,64","7 201,64",53 465 300,After,Monday,January,no
...,...,...,...,...,...,...,...,...,...,...,...,...
13617,DowJ,2016-01-29,15957.00,16376.00,15937.00,16356.00,16356.00,217260,Before,Friday,January,no
13618,DowJ,2016-01-28,15861.00,16055.00,15768.00,15972.00,15972.00,268654,Before,Thursday,January,no
13619,DowJ,2016-01-27,15999.00,16151.00,15789.00,15851.00,15851.00,261837,Before,Wednesday,January,no
13620,DowJ,2016-01-26,15810.00,16101.00,15640.00,16066.00,16066.00,193761,Before,Tuesday,January,no


Downloading the final df

In [49]:
final_df.to_csv('df_trump2012all.csv')

In [31]:
election_date = pd.Timestamp('2012-11-05')


In [32]:
# Convertir la colonne "Date" au format datetime si ce n'est pas déjà fait
final_df['Date'] = pd.to_datetime(final_df['Date'])

In [34]:
import pandas as pd

# Définir la date de l'élection
election_date = pd.Timestamp('2012-11-05')

# Assurez-vous que la colonne "Date" est au format datetime
final_df['Date'] = pd.to_datetime(final_df['Date'])

# Filtrer les données pour "3 mois avant"
start_date_before = election_date - pd.DateOffset(months=3)
end_date_before = election_date - pd.DateOffset(days=1)

final_df_3m_before = final_df[(final_df['Date'] >= start_date_before) & (final_df['Date'] <= end_date_before)]
total_volume_3m_before = final_df_3m_before['Volume'].sum()

# Filtrer les données pour "3 mois après"
start_date_after = election_date + pd.DateOffset(days=1)
end_date_after = election_date + pd.DateOffset(months=3)

final_df_3m_after = final_df[(final_df['Date'] >= start_date_after) & (final_df['Date'] <= end_date_after)]
total_volume_3m_after = final_df_3m_after['Volume'].sum()

# Résultats
print("Total du volume 3 mois avant l'élection:", total_volume_3m_before)
print("Total du volume 3 mois après l'élection:", total_volume_3m_after)


Total du volume 3 mois avant l'élection: 101 731 700101 390 60096 987 60079 346 90088 366 60096 304 30099 701 50089 958 80091 091 90083 826 400104 200 10096 874 00083 323 60090 352 90069 410 30092 119 400106 795 50099 238 70098 600 20087 470 600117 429 400102 183 90098 277 70095 873 90061 707 600123 077 70080 427 60078 464 10091 773 80086 235 10084 998 800112 714 50082 277 50072 813 00080 744 000208 077 200102 309 100107 154 60091 945 60098 367 10090 844 40094 568 30075 442 80071 272 60075 716 900105 485 900107 456 600112 516 30075 965 20048 533 90071 774 70064 056 70066 667 60067 974 60065 669 90074 256 60062 662 60066 460 00075 577 60043 395 90062 472 70068 621 60068 145 50077 065 40070 007 00090 951 1003 841 910 0003 892 100 0004 289 120 0004 539 190 0003 926 560 0004 028 270 0004 209 400 0003 778 120 0003 756 200 0003 359 950 0003 456 390 0003 339 320 0003 364 990 0003 172 940 0002 832 440 0003 187 910 0003 538 030 0002 959 510 0003 393 060 0002 881 970 0003 578 770 0003 438 040 00

In [35]:
final_df_3m_after

Unnamed: 0,Indice,Date,Opening,Highest,Lowest,Fermer,Cloture ajusté,Volume,Before_After_Election,Day_of_Week,Month,Is_Holiday_Period
1270,CAC40,2017-02-03,"4 808,46","4 847,47","4 803,91","4 825,42","4 825,42",80 815 700,After,Friday,February,no
1271,CAC40,2017-02-02,"4 785,14","4 812,31","4 774,18","4 794,29","4 794,29",98 607 300,After,Thursday,February,no
1272,CAC40,2017-02-01,"4 786,85","4 823,81","4 782,61","4 794,58","4 794,58",97 905 700,After,Wednesday,February,no
1273,CAC40,2017-01-31,"4 790,28","4 812,55","4 748,90","4 748,90","4 748,90",98 487 800,After,Tuesday,January,no
1274,CAC40,2017-01-30,"4 823,50","4 830,69","4 771,98","4 784,64","4 784,64",83 384 600,After,Monday,January,no
...,...,...,...,...,...,...,...,...,...,...,...,...
13418,DowJ,2016-11-14,18806.00,18918.00,18762.00,18822.00,18822.00,212519,After,Monday,November,yes
13419,DowJ,2016-11-10,18533.00,18821.00,18492.00,18786.00,18786.00,339145,After,Thursday,November,yes
13420,DowJ,2016-11-09,18304.00,18590.00,17418.00,18532.00,18532.00,651872,After,Wednesday,November,yes
13421,DowJ,2016-11-08,18188.00,18345.00,18129.00,18285.00,18285.00,162218,After,Tuesday,November,yes


In [36]:
final_df_3m_before

Unnamed: 0,Indice,Date,Opening,Highest,Lowest,Fermer,Cloture ajusté,Volume,Before_After_Election,Day_of_Week,Month,Is_Holiday_Period
1334,CAC40,2016-11-04,"4 403,16","4 403,64","4 355,81","4 377,46","4 377,46",101 731 700,Before,Friday,November,yes
1335,CAC40,2016-11-03,"4 402,79","4 456,21","4 399,29","4 411,68","4 411,68",101 390 600,Before,Thursday,November,yes
1336,CAC40,2016-11-02,"4 437,99","4 449,44","4 414,67","4 414,67","4 414,67",96 987 600,Before,Wednesday,November,yes
1337,CAC40,2016-11-01,"4 532,34","4 537,99","4 460,54","4 470,28","4 470,28",79 346 900,Before,Tuesday,November,yes
1338,CAC40,2016-10-31,"4 525,60","4 540,26","4 500,19","4 509,26","4 509,26",88 366 600,Before,Monday,October,no
...,...,...,...,...,...,...,...,...,...,...,...,...
13482,DowJ,2016-08-11,18450.00,18594.00,18433.00,18565.00,18565.00,111156,Before,Thursday,August,no
13483,DowJ,2016-08-10,18470.00,18513.00,18417.00,18452.00,18452.00,96005,Before,Wednesday,August,no
13484,DowJ,2016-08-09,18466.00,18522.00,18442.00,18466.00,18466.00,87302,Before,Tuesday,August,no
13485,DowJ,2016-08-08,18458.00,18508.00,18436.00,18460.00,18460.00,84638,Before,Monday,August,no


In [None]:
final_df_3m_before.to_csv('Trump_2016_prior')
final_df_3m_before.to_csv('Trump_2016_after')