In [1]:
!pip install git-lfs
!pip install pandas pyreadr
import pandas as pd
import pyreadr
import re
import requests
import os
import shutil
import zipfile



In [2]:
# Identification des fichiers csv
def extract_strings_from_webpage(url):
    response = requests.get(url) 
    if response.status_code == 200:
        strings = re.findall(r'"([^"]*)"', response.text)
        return strings
    else:
        print(f"Failed to fetch the webpage. Status code: {response.status_code}")
        return []

webpage_url = "https://unehistoireduconflitpolitique.fr/telecharger.html"
extracted_strings = extract_strings_from_webpage(webpage_url)

download_links = [item for item in extracted_strings if item.endswith("csv.zip") or item.endswith("csp.zip")]

In [3]:
# Téléchargement des fichiers
os.makedirs('data_zip', exist_ok=True)

for link in download_links:
    try:
        file_name = os.path.join('data_zip', os.path.basename(link))
        response = requests.get(link)
        with open(file_name, 'wb') as file:
            file.write(response.content)
        print(f"Downloaded: {file_name}")
    except Exception as e:
        print(f"Error downloading {link}: {e}")
        
print("Download completed")

Downloaded: data_zip/pres1848_csv.zip
Downloaded: data_zip/pres1965_csv.zip
Downloaded: data_zip/pres1969_csv.zip
Downloaded: data_zip/pres1974_csv.zip
Downloaded: data_zip/pres1981_csv.zip
Downloaded: data_zip/pres1988_csv.zip
Downloaded: data_zip/pres1995_csv.zip
Downloaded: data_zip/pres2002_csv.zip
Downloaded: data_zip/pres2007_csv.zip
Downloaded: data_zip/pres2012_csv.zip
Downloaded: data_zip/pres2017_csv.zip
Downloaded: data_zip/pres2022_csv.zip
Downloaded: data_zip/ref1793_csv.zip
Downloaded: data_zip/ref1795_csv.zip
Downloaded: data_zip/ref1946_csv.zip
Downloaded: data_zip/ref1992_csv.zip
Downloaded: data_zip/ref2005_csv.zip
Downloaded: data_zip/leg1848_csv.zip
Downloaded: data_zip/leg1849_csv.zip
Downloaded: data_zip/leg1871fev_csv.zip
Downloaded: data_zip/leg1871juil_csv.zip
Downloaded: data_zip/leg1876_csv.zip
Downloaded: data_zip/leg1881_csv.zip
Downloaded: data_zip/leg1885_csv.zip
Downloaded: data_zip/leg1889_csv.zip
Downloaded: data_zip/leg1893_csv.zip
Downloaded: data_zi

In [4]:
# Extraction des résultats électoraux
os.makedirs('data_csv/elections/pres', exist_ok=True)
os.makedirs('data_csv/elections/leg', exist_ok=True)
os.makedirs('data_csv/elections/ref', exist_ok=True)

files = os.listdir('data_zip')
for prefix in ['pres', 'leg', 'ref']:
    for file in files:
        if file.startswith(prefix) and file.endswith('.zip'):
            zip_file_path = os.path.join('data_zip', file)
            prefix_dir = os.path.join('data_csv/elections', prefix)
            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                for member in zip_ref.infolist():
                    if member.filename.lower().endswith('.csv'):
                        target_path = os.path.join(prefix_dir, os.path.basename(member.filename))
                        with zip_ref.open(member) as source, open(target_path, 'wb') as dest:
                            shutil.copyfileobj(source, dest)
            print(f"Extracted CSV files from {file}")
            os.remove(zip_file_path)
print("Extraction completed.")

Extracted CSV files from pres1981_csv.zip
Extracted CSV files from pres2012_csv.zip
Extracted CSV files from pres2002_csv.zip
Extracted CSV files from pres2022_csv.zip
Extracted CSV files from pres2007_csv.zip
Extracted CSV files from pres1969_csv.zip
Extracted CSV files from pres1995_csv.zip
Extracted CSV files from pres1848_csv.zip
Extracted CSV files from pres1974_csv.zip
Extracted CSV files from pres1988_csv.zip
Extracted CSV files from pres2017_csv.zip
Extracted CSV files from pres1965_csv.zip
Extracted CSV files from leg1986_csv.zip
Extracted CSV files from leg1967_csv.zip
Extracted CSV files from leg1962_csv.zip
Extracted CSV files from leg1893_csv.zip
Extracted CSV files from leg1906_csv.zip
Extracted CSV files from leg1946Nov_csv.zip
Extracted CSV files from leg1973_csv.zip
Extracted CSV files from leg2012_csv.zip
Extracted CSV files from leg2022_csv.zip
Extracted CSV files from leg1981_csv.zip
Extracted CSV files from leg1885_csv.zip
Extracted CSV files from leg1936_csv.zip
E

In [5]:
# Nettoyage des résultats électoraux
for root, dirs, files in os.walk('data_csv'):
    for file_name in files:
        if file_name.startswith("._"):
            file_path = os.path.join(root, file_name)
            try:
                os.remove(file_path)
                print(f"Deleted file: {file_path}")
            except Exception as e:
                print(f"Error deleting file {file_path}: {e}")

print("Deletion of extra files completed")

os.rename("data_csv/elections/pres", "data_csv/elections/presidentielles")
os.rename("data_csv/elections/leg", "data_csv/elections/legislatives")
os.rename("data_csv/elections/ref", "data_csv/elections/referendums")

print("Folder renaming completed")

Deleted file: data_csv/elections/pres/._pres2017comm.csv
Deleted file: data_csv/elections/pres/._pres1995comm.csv
Deleted file: data_csv/elections/pres/._pres1848comm.csv
Deleted file: data_csv/elections/pres/._pres2012comm.csv
Deleted file: data_csv/elections/pres/._pres1988comm.csv
Deleted file: data_csv/elections/pres/._pres2022comm.csv
Deleted file: data_csv/elections/pres/._pres2002comm.csv
Deleted file: data_csv/elections/pres/._pres1965comm.csv
Deleted file: data_csv/elections/pres/._pres1981comm.csv
Deleted file: data_csv/elections/pres/._pres1969comm.csv
Deleted file: data_csv/elections/pres/._pres2007comm.csv
Deleted file: data_csv/elections/pres/._pres1974comm.csv
Deleted file: data_csv/elections/leg/._leg1968comm.csv
Deleted file: data_csv/elections/leg/._leg2007comm.csv
Deleted file: data_csv/elections/leg/._leg1981comm.csv
Deleted file: data_csv/elections/leg/._leg1885comm.csv
Deleted file: data_csv/elections/leg/._leg1914comm.csv
Deleted file: data_csv/elections/leg/._le

In [6]:
# Extraction des contrôles
os.makedirs('data_csv/controles', exist_ok=True)

zip_files = [f for f in os.listdir('data_zip') if f.endswith('.zip')]
for zip_file in zip_files:
    if zip_file.startswith(('pres', 'leg', 'ref')):
        continue
    zip_path = os.path.join('data_zip', zip_file)
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            file_list = [file for file in zip_ref.namelist() if not file.startswith('__MACOSX')]
            zip_ref.extractall('data_csv/controles', members=file_list)    
            print(f"Files extracted from {zip_file}:")
            for extracted_file in file_list:
                print(extracted_file)
            os.remove(zip_path)
    except Exception as e:
        print(f"Error extracting {zip_file}: {e}")

print("Extraction complete.")

Files extracted from Proprietaires_csv.zip:
Proprietaires_csv/
Proprietaires_csv/proprietairescommunes.csv
Files extracted from Alphabetisation_csv.zip:
alphabetisationcommunes.csv
Files extracted from Revenus_csv.zip:
Revenus_csv/
Revenus_csv/revcommunes.csv
Revenus_csv/revimpdepartements.csv
Revenus_csv/pibcommunes.csv
Revenus_csv/pibdepartements.csv
Revenus_csv/revdepartements.csv
Files extracted from Enseignement_prive_csv.zip:
Enseignement_prive_csv/
Enseignement_prive_csv/religiositecantons1791.csv
Enseignement_prive_csv/religiositecommunes1791.csv
Enseignement_prive_csv/publicprivedepartements.csv
Enseignement_prive_csv/publicprivecommunes2021.csv
Enseignement_prive_csv/religiositedistricts1791.csv
Enseignement_prive_csv/publicprivecommunes1894.csv
Enseignement_prive_csv/publicprivecantons2021.csv
Enseignement_prive_csv/publicprivecantons1894.csv
Enseignement_prive_csv/religiositedepartements.csv
Enseignement_prive_csv/religiositecommunes.csv
Files extracted from Diplomes_csv.zi

In [7]:
# Nettoyage des contrôles
folders = [f for f in os.listdir('data_csv/controles') if os.path.isdir(os.path.join('data_csv/controles', f))]
for folder in folders:
    if folder.endswith('_csv'):
        old_path = os.path.join('data_csv/controles', folder)
        new_folder_name = folder[:-4]
        new_path = os.path.join('data_csv/controles', new_folder_name)
        os.rename(old_path, new_path)
        print(f"Renamed: {folder} -> {new_folder_name}")
        
print("Folder renaming complete.")

Renamed: Enseignement_prive_csv -> Enseignement_prive
Renamed: Diplomes_csv -> Diplomes
Renamed: Proprietaires_csv -> Proprietaires
Renamed: Taille_agglo_commune_csv -> Taille_agglo_commune
Renamed: Nationalites_csv -> Nationalites
Renamed: Capital_immobilier_csv -> Capital_immobilier
Renamed: Revenus_csv -> Revenus
Folder renaming complete.


In [8]:
shutil.rmtree('data_zip')
print('Downloaded data removed.')

Downloaded data removed.


In [None]:
# Conversion au format R
def csv_to_rda(input_csv_path, output_rda_path):
    data = pd.read_csv(input_csv_path, low_memory=False, encoding='latin1')
    pyreadr.write_rdata(output_rda_path, data, compress='gzip')
    os.remove(input_csv_path)
    print(f"Converted file: {input_csv_path}")

def convert_csv_files(input_folder, output_folder):
    for root, dirs, files in os.walk(input_folder):
        for file in files:
            if file.endswith(".csv"):
                input_csv_path = os.path.join(root, file)
                relative_path = os.path.relpath(input_csv_path, input_folder)
                output_rda_path = os.path.join(output_folder, os.path.splitext(relative_path)[0] + ".rda")
                os.makedirs(os.path.dirname(output_rda_path), exist_ok=True)
                csv_to_rda(input_csv_path, output_rda_path)

if __name__ == "__main__":
    input_folder = "data_csv"
    output_folder = "data_rda"
    convert_csv_files(input_folder, output_folder)

Converted file: data_csv/elections/referendums/ref1992comm.csv
Converted file: data_csv/elections/referendums/ref2005comm.csv
Converted file: data_csv/elections/referendums/ref1946comm.csv
Converted file: data_csv/elections/referendums/ref1795comm.csv
Converted file: data_csv/elections/referendums/ref1793comm.csv
Converted file: data_csv/elections/legislatives/leg1898comm.csv
Converted file: data_csv/elections/legislatives/leg1978comm.csv
Converted file: data_csv/elections/legislatives/leg2017comm.csv
Converted file: data_csv/elections/legislatives/leg1973comm.csv
Converted file: data_csv/elections/legislatives/leg1914comm.csv
Converted file: data_csv/elections/legislatives/leg1968comm.csv
Converted file: data_csv/elections/legislatives/leg1928comm.csv
Converted file: data_csv/elections/legislatives/leg1881comm.csv
Converted file: data_csv/elections/legislatives/leg1951comm.csv
Converted file: data_csv/elections/legislatives/leg1906comm.csv
Converted file: data_csv/elections/legislativ

In [None]:
shutil.rmtree('data_csv')
print('Extracted data removed.')

In [None]:
from subprocess import run, PIPE

# Function to get the list of modified files
def get_modified_files():
    result = run(["git", "diff", "--name-only"], stdout=PIPE, text=True)
    return result.stdout.strip().split('\n')

# Function to commit and push changes
def commit_and_push(file_path, commit_message):
    run(["git", "add", file_path])
    run(["git", "commit", "-m", commit_message])
    run(["git", "push"])

# Function to commit and push modified files
def commit_and_push_modified_files():
    modified_files = get_modified_files()
    
    for file_name in modified_files:
        commit_message = f"Update {file_name}"
        commit_and_push(file_name, commit_message)

# Commit and push modified files
commit_and_push_modified_files()