In [None]:
import requests
import re
import os
import zipfile
import shutil

In [None]:
# Liste des fichiers csv
def extract_strings_from_webpage(url):
    response = requests.get(url) 
    if response.status_code == 200:
        strings = re.findall(r'"([^"]*)"', response.text)
        return strings
    else:
        print(f"Failed to fetch the webpage. Status code: {response.status_code}")
        return []

webpage_url = "https://unehistoireduconflitpolitique.fr/telecharger.html"  # Replace with the URL of the webpage you want to extract strings from
extracted_strings = extract_strings_from_webpage(webpage_url)
download_links = [item for item in extracted_strings if item.endswith("csv.zip") or item.endswith("csp.zip")]

In [None]:
# Téléchargement des fichiers
if not os.path.exists('data_download'):
    os.makedirs('data_download')

for link in download_links:
    try:
        file_name = os.path.join('data_download', os.path.basename(link))
        response = requests.get(link)
        with open(file_name, 'wb') as file:
            file.write(response.content)
        print(f"Downloaded: {file_name}")
    except Exception as e:
        print(f"Error downloading {link}: {e}")

In [None]:
# Extraction des résultats électoraux
os.makedirs('data_extraction/pres', exist_ok=True)
os.makedirs('data_extraction/leg', exist_ok=True)
os.makedirs('data_extraction/ref', exist_ok=True)

files = os.listdir('data_download')
for prefix in ['pres', 'leg', 'ref']:
    for file in files:
        if file.startswith(prefix) and file.endswith('.zip'):
            zip_file_path = os.path.join('data_download', file)
            prefix_dir = os.path.join('data_extraction', prefix)
            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                for member in zip_ref.infolist():
                    if member.filename.lower().endswith('.csv'):
                        target_path = os.path.join(prefix_dir, os.path.basename(member.filename))
                        with zip_ref.open(member) as source, open(target_path, 'wb') as dest:
                            shutil.copyfileobj(source, dest)
            print(f"Extracted CSV files from {file} to {prefix_dir}")
print("Extraction completed.")

In [None]:
# Extraction des contrôles
os.makedirs('data_extraction/controles', exist_ok=True)

zip_files = [f for f in os.listdir('data_download') if f.endswith('.zip')]
for zip_file in zip_files:
    if zip_file.startswith(('pres', 'leg', 'ref')):
        continue
    zip_path = os.path.join('data_download', zip_file)
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            file_list = [file for file in zip_ref.namelist() if not file.startswith('__MACOSX')]
            zip_ref.extractall('data_extraction/controles', members=file_list)
    except Exception as e:
        print(f"Error extracting {zip_file}: {e}")

print("Extraction complete.")

In [None]:
# Nettoyage du répertoire des contrôles
folders = [f for f in os.listdir('data_extraction/controles') if os.path.isdir(os.path.join('data_extraction/controles', f))]
for folder in folders:
    if folder.endswith('_csv'):
        old_path = os.path.join('data_extraction/controles', folder)
        new_folder_name = folder[:-4]  # Remove the last 4 characters ('_csv')
        new_path = os.path.join('data_extraction/controles', new_folder_name)
        os.rename(old_path, new_path)
        print(f"Renamed: {folder} -> {new_folder_name}")
print("Folder renaming complete.")

In [None]:
shutil.rmtree('data_download')
print('Data download directory removed.')