In [1]:
import requests
import re
import os
import shutil
import zipfile

In [2]:
# Identification des fichiers csv
def extract_strings_from_webpage(url):
    response = requests.get(url) 
    if response.status_code == 200:
        strings = re.findall(r'"([^"]*)"', response.text)
        return strings
    else:
        print(f"Failed to fetch the webpage. Status code: {response.status_code}")
        return []

webpage_url = "https://unehistoireduconflitpolitique.fr/telecharger.html"
extracted_strings = extract_strings_from_webpage(webpage_url)

download_links = [item for item in extracted_strings if item.endswith("csv.zip") or item.endswith("csp.zip")]

In [3]:
# Téléchargement des fichiers
os.makedirs('data_download', exist_ok=True)

for link in download_links:
    try:
        file_name = os.path.join('data_download', os.path.basename(link))
        response = requests.get(link)
        with open(file_name, 'wb') as file:
            file.write(response.content)
        print(f"Downloaded: {file_name}")
    except Exception as e:
        print(f"Error downloading {link}: {e}")
        
print("Download completed")

Downloaded: data_download\pres1848_csv.zip
Downloaded: data_download\pres1965_csv.zip
Downloaded: data_download\pres1969_csv.zip
Downloaded: data_download\pres1974_csv.zip
Downloaded: data_download\pres1981_csv.zip
Downloaded: data_download\pres1988_csv.zip
Downloaded: data_download\pres1995_csv.zip
Downloaded: data_download\pres2002_csv.zip
Downloaded: data_download\pres2007_csv.zip
Downloaded: data_download\pres2012_csv.zip
Downloaded: data_download\pres2017_csv.zip
Downloaded: data_download\pres2022_csv.zip
Downloaded: data_download\ref1793_csv.zip
Downloaded: data_download\ref1795_csv.zip
Downloaded: data_download\ref1946_csv.zip
Downloaded: data_download\ref1992_csv.zip
Downloaded: data_download\ref2005_csv.zip
Downloaded: data_download\leg1848_csv.zip
Downloaded: data_download\leg1849_csv.zip
Downloaded: data_download\leg1871fev_csv.zip
Downloaded: data_download\leg1871juil_csv.zip
Downloaded: data_download\leg1876_csv.zip
Downloaded: data_download\leg1881_csv.zip
Downloaded: dat

In [4]:
# Extraction des résultats électoraux
os.makedirs('data_extraction/elections/pres', exist_ok=True)
os.makedirs('data_extraction/elections/leg', exist_ok=True)
os.makedirs('data_extraction/elections/ref', exist_ok=True)

files = os.listdir('data_download')
for prefix in ['pres', 'leg', 'ref']:
    for file in files:
        if file.startswith(prefix) and file.endswith('.zip'):
            zip_file_path = os.path.join('data_download', file)
            prefix_dir = os.path.join('data_extraction/elections', prefix)
            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                for member in zip_ref.infolist():
                    if member.filename.lower().endswith('.csv'):
                        target_path = os.path.join(prefix_dir, os.path.basename(member.filename))
                        with zip_ref.open(member) as source, open(target_path, 'wb') as dest:
                            shutil.copyfileobj(source, dest)
            print(f"Extracted CSV files from {file} to {prefix_dir}")
            
print("Extraction completed.")

Extracted CSV files from pres1848_csv.zip to data_extraction/elections\pres
Extracted CSV files from pres1965_csv.zip to data_extraction/elections\pres
Extracted CSV files from pres1969_csv.zip to data_extraction/elections\pres
Extracted CSV files from pres1974_csv.zip to data_extraction/elections\pres
Extracted CSV files from pres1981_csv.zip to data_extraction/elections\pres
Extracted CSV files from pres1988_csv.zip to data_extraction/elections\pres
Extracted CSV files from pres1995_csv.zip to data_extraction/elections\pres
Extracted CSV files from pres2002_csv.zip to data_extraction/elections\pres
Extracted CSV files from pres2007_csv.zip to data_extraction/elections\pres
Extracted CSV files from pres2012_csv.zip to data_extraction/elections\pres
Extracted CSV files from pres2017_csv.zip to data_extraction/elections\pres
Extracted CSV files from pres2022_csv.zip to data_extraction/elections\pres
Extracted CSV files from leg1848_csv.zip to data_extraction/elections\leg
Extracted CSV 

In [5]:
# Nettoyage des résultats électoraux
for root, dirs, files in os.walk('data_extraction'):
    for file_name in files:
        if file_name.startswith("._"):
            file_path = os.path.join(root, file_name)
            try:
                os.remove(file_path)
                print(f"Deleted file: {file_path}")
            except Exception as e:
                print(f"Error deleting file {file_path}: {e}")

print("Deletion of extra files completed")

os.rename("data_extraction/elections/pres", "data_extraction/elections/presidentielles")
os.rename("data_extraction/elections/leg", "data_extraction/elections/legislatives")
os.rename("data_extraction/elections/ref", "data_extraction/elections/referendums")

print("Folder renaming completed")

Deleted file: data_extraction\elections\leg\._leg1848comm.csv
Deleted file: data_extraction\elections\leg\._leg1849comm.csv
Deleted file: data_extraction\elections\leg\._leg1871comm.csv
Deleted file: data_extraction\elections\leg\._leg1872comm.csv
Deleted file: data_extraction\elections\leg\._leg1876comm.csv
Deleted file: data_extraction\elections\leg\._leg1881comm.csv
Deleted file: data_extraction\elections\leg\._leg1885comm.csv
Deleted file: data_extraction\elections\leg\._leg1889comm.csv
Deleted file: data_extraction\elections\leg\._leg1893comm.csv
Deleted file: data_extraction\elections\leg\._leg1898comm.csv
Deleted file: data_extraction\elections\leg\._leg1902comm.csv
Deleted file: data_extraction\elections\leg\._leg1906comm.csv
Deleted file: data_extraction\elections\leg\._leg1910comm.csv
Deleted file: data_extraction\elections\leg\._leg1914comm.csv
Deleted file: data_extraction\elections\leg\._leg1919comm.csv
Deleted file: data_extraction\elections\leg\._leg1924comm.csv
Deleted 

In [6]:
# Extraction des contrôles
os.makedirs('data_extraction/controles', exist_ok=True)

zip_files = [f for f in os.listdir('data_download') if f.endswith('.zip')]
for zip_file in zip_files:
    if zip_file.startswith(('pres', 'leg', 'ref')):
        continue
    zip_path = os.path.join('data_download', zip_file)
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            file_list = [file for file in zip_ref.namelist() if not file.startswith('__MACOSX')]
            zip_ref.extractall('data_extraction/controles', members=file_list)    
            print(f"Files extracted from {zip_file}:")
            for extracted_file in file_list:
                print(extracted_file)
    except Exception as e:
        print(f"Error extracting {zip_file}: {e}")

print("Extraction complete.")

Files extracted from Age_csp.zip:
Age_csp/
Age_csp/agesexdepartements.csv
Age_csp/agesexcommunes.csv
Age_csp/menagescommunes.csv
Age_csp/menagesdepartements.csv
Files extracted from Alphabetisation_csv.zip:
alphabetisationcommunes.csv
Files extracted from Capital_immobilier_csv.zip:
Capital_immobilier_csv/
Capital_immobilier_csv/isfcommunes.csv
Capital_immobilier_csv/capitalimmobilierdepartements.csv
Capital_immobilier_csv/capitalimmobilier.csv
Capital_immobilier_csv/basesfiscalescommunes.csv
Capital_immobilier_csv/terrescommunes.csv
Capital_immobilier_csv/basesfiscalesdepartements.csv
Capital_immobilier_csv/capitalimmobiliercommunes.csv
Files extracted from CSP_csv.zip:
CSP_csv/
CSP_csv/rsacommunes.csv
CSP_csv/crimesdelitscommunes.csv
CSP_csv/emploicommunes.csv
CSP_csv/empfoncommunes.csv
CSP_csv/cspcommunes.csv
Files extracted from Diplomes_csv.zip:
Diplomes_csv/
Diplomes_csv/diplomescommunes.csv
Diplomes_csv/diplomesdepartements.csv
Files extracted from Enseignement_prive_csv.zip:
En

In [7]:
# Nettoyage des contrôles
folders = [f for f in os.listdir('data_extraction/controles') if os.path.isdir(os.path.join('data_extraction/controles', f))]
for folder in folders:
    if folder.endswith('_csv'):
        old_path = os.path.join('data_extraction/controles', folder)
        new_folder_name = folder[:-4]
        new_path = os.path.join('data_extraction/controles', new_folder_name)
        os.rename(old_path, new_path)
        print(f"Renamed: {folder} -> {new_folder_name}")
        
print("Folder renaming complete.")

Renamed: Capital_immobilier_csv -> Capital_immobilier
Renamed: CSP_csv -> CSP
Renamed: Diplomes_csv -> Diplomes
Renamed: Enseignement_prive_csv -> Enseignement_prive
Renamed: Nationalites_csv -> Nationalites
Renamed: Proprietaires_csv -> Proprietaires
Renamed: Revenus_csv -> Revenus
Renamed: Taille_agglo_commune_csv -> Taille_agglo_commune
Folder renaming complete.


In [8]:
shutil.rmtree('data_download')
print('Data download directory removed.')

Data download directory removed.
