In [3]:
import os
import zipfile

def zip_directory(folder_path, zip_path):
    """
    Zips the specified folder and saves it to the specified zip file path.

    Args:
        folder_path (str): The path to the folder to be zipped.
        zip_path (str): The path where the zip file should be saved.
    """
    # Ensure the folder path exists
    if not os.path.isdir(folder_path):
        raise ValueError(f"The folder path {folder_path} does not exist or is not a directory.")

    # Create a ZipFile object
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Walk through the directory
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                # Create the full path to the file
                full_path = os.path.join(root, file)
                # Add file to the zip file
                arcname = os.path.relpath(full_path, start=folder_path)
                zipf.write(full_path, arcname=arcname)


Compression of resources

In [4]:
zip_directory('./resources', './resources.zip')

In [7]:
zip_directory('./exports', './exports.zip')

In [6]:
 pip install git

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement git (from versions: none)
ERROR: No matching distribution found for git


In [None]:
import os
import git

def push_exports_to_github(repo_path, exports_folder, commit_message, branch='main'):
    """
    Pushes the changes in the specified exports folder to GitHub.

    Args:
        repo_path (str): The path to the local git repository.
        exports_folder (str): The path to the exports folder relative to the repository.
        commit_message (str): The commit message for the changes.
        branch (str): The branch to push the changes to. Default is 'main'.
    """
    # Ensure the repository path exists
    if not os.path.isdir(repo_path):
        raise ValueError(f"The repository path {repo_path} does not exist or is not a directory.")

    # Ensure the exports folder path exists
    exports_path = os.path.join(repo_path, exports_folder)
    if not os.path.isdir(exports_path):
        raise ValueError(f"The exports folder path {exports_path} does not exist or is not a directory.")

    try:
        # Initialize the repository
        repo = git.Repo(repo_path)

        # Stage changes in the exports folder
        repo.git.add(exports_path)

        # Commit the changes
        repo.index.commit(commit_message)

        # Push the changes to the specified branch
        origin = repo.remote(name='origin')
        origin.push(refspec=f'{branch}:{branch}')

        print(f"Changes from '{exports_folder}' pushed to GitHub branch '{branch}' successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage:
# Make sure your repo_path points to the local git repository
repo_path = '/path/to/your/local/repo'
exports_folder = 'exports'  # This should be the relative path from the repo root
commit_message = 'Add files from exports directory'
push_exports_to_github(repo_path, exports_folder, commit_message)


# Update Github

In [None]:
import os
from subprocess import check_call, CalledProcessError

COMMIT_MESSAGE = "Notebooks updates"

def git_add_commit_push():
    try:
        check_call(["git", "add", "."])
        check_call(["git", "commit", "-m", COMMIT_MESSAGE])
        check_call(["git", "push"])
    except CalledProcessError as e:
        print(f"Error during git operations: {e}")

def main():
    git_add_commit_push()

if __name__ == "__main__":
    main()

In [None]:
check_call(["git", "push"])

In [2]:
import os
import subprocess

def git_command(command):
    try:
        result = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        return result.stdout.decode(), result.stderr.decode()
    except subprocess.CalledProcessError as e:
        return e.stdout.decode(), e.stderr.decode()

def git_pull():
    print("Stashing any local changes...")
    stdout, stderr = git_command(["git", "stash"])
    if stderr:
        print(f"Error during git stash: {stderr}")
        return

    print("Pulling latest changes from the repository...")
    stdout, stderr = git_command(["git", "pull"])
    print(stdout)
    if 'CONFLICT' in stdout or 'CONFLICT' in stderr:
        print("Merge conflicts detected. Attempting to resolve automatically...")
        stdout, stderr = git_command(["git", "merge", "--abort"])
        if stderr:
            print(f"Error during merge abort: {stderr}")
            return
        stdout, stderr = git_command(["git", "pull", "--strategy-option=theirs"])
        if stderr:
            print(f"Error during git pull with merge strategy: {stderr}")
            return

    print("Applying stashed changes...")
    stdout, stderr = git_command(["git", "stash", "pop"])
    if stderr:
        print(f"Error during git stash pop: {stderr}")
        return

    print("Repository successfully updated.")

In [3]:
git_pull()

Stashing any local changes...
Pulling latest changes from the repository...
Auto-merging Fusilier_Antoine_1_notebook_exploratory_analysis_and_cleaning_and_feature_enginering_022024.ipynb
CONFLICT (content): Merge conflict in Fusilier_Antoine_1_notebook_exploratory_analysis_and_cleaning_and_feature_enginering_022024.ipynb
Automatic merge failed; fix conflicts and then commit the result.

Merge conflicts detected. Attempting to resolve automatically...
Applying stashed changes...
Error during git stash pop: No stash entries found.



In [None]:
pip install pyspark

In [None]:
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession

# Configurer Spark pour accéder à HDFS
conf = SparkConf().setAppName("HDFS Access").setMaster("local[*]")
sc = SparkContext(conf=conf)
spark = SparkSession(sc)

# Exemple de lecture d'un fichier HDFS
hdfs_url = "hdfs://45.93.138.139:9000/path/to/your/file"
df = spark.read.text(hdfs_url)
df.show()
