# Clone all the repos from GitLab

In [None]:
import re
import os
import gitlab
from git import Repo, Git
from dotenv import load_dotenv

load_dotenv(".env")

In [2]:
# GitLab

GITLAB_PERSONAL_TOKEN = {
    "title": "mine",
    "key": os.getenv("GITLAB_TOKEN")
}

gl = gitlab.Gitlab(
    private_token=GITLAB_PERSONAL_TOKEN["key"]
)
gl.auth()


SSH_CMD = f"ssh -i {os.getenv('SSH_PRV_KEY_FILE')}"

In [3]:
# Check if the local repos are synced with the remotes

SOURCE_CODE_DIR = os.getenv("SOURCE_CODE_DIR")

local_repos_dirs = []

for tmp in os.walk(SOURCE_CODE_DIR):
    # find all the folders with a .git directory inside
    if ".git" in tmp[-2]:
        local_repos_dirs.append(tmp[0])


In [4]:
# Fetch the remote projects

origin_projects = []

for group in gl.groups.list(get_all=True):
    projects = group.projects.list(get_all=True)
    if os.getenv('PROJECT_NAMESPACE') not in group.web_url.lower(): continue
    for project in projects:
        origin_projects.append(project)


In [None]:
# Check that there is no remote projects missing locally

if len(origin_projects) == len(local_repos_dirs):
    print(f"All the {len(origin_projects)} remote repos are saved locally")
    
    # # Check what local repos are not up to date or have issues being synced
    # for local_repo_dir in local_repos_dirs:

    #     with Git().custom_environment(GIT_SSH_COMMAND=SSH_CMD):
            
    #         print(f"Fetching {local_repo_dir}... ")

    #         try:
    #             repo = Repo(local_repo_dir)
    #             repo_origin = repo.remotes.origin
    #             repo_origin.pull()
    #             print("Pulled!")
    #         except Exception as e:
    #             print(f"Error: {e.args}")

else:
    print(f"There are {len(local_repos_dirs)} local repos vs {len(origin_projects)} remote")



In [6]:
projects4cloning = {}

for project in origin_projects:
    ssh_link = project.ssh_url_to_repo
    repo_dir = project.path_with_namespace.replace(f"{os.getenv('PROJECT_NAMESPACE')}/", "", 1)
    repo_name = re.findall("\/(\S*).git", ssh_link)[0]

    projects4cloning[repo_name.replace("-", "_")] = {
        "ssh_link": ssh_link,
        "repo_dir": repo_dir
    }


In [7]:
# clone all the repos

SSH_CMD = f"ssh -i {os.getenv('SSH_PRV_KEY_FILE')}"

def git_sync(repo_name, ssh_repo_link, repo_directory):
    '''Either clones or pulls the repo'''

    with Git().custom_environment(GIT_SSH_COMMAND=SSH_CMD):
        print(f"Fetching {repo_name} in {repo_directory}... ")
        full_repo_dir = os.path.join(SOURCE_CODE_DIR, repo_directory)

        # Clone a new repo
        clone_exception = None
        try:
            # print("Cloning... ", end="")
            Repo.clone_from(ssh_repo_link, full_repo_dir)
            msg = "Cloned!"
            return {"status": True, "message": {"text": msg, "details": None}}
        except Exception as e:
            clone_exception = e
            pass

        if clone_exception.status == 128:
            # Repo already exists, pull the latest
            # print("Pulling... ", end="")

            pulling_exception = None
            try:
                repo = Repo(full_repo_dir)
                repo_origin = repo.remotes.origin
                repo_origin.pull()
                msg = "Pulled!"
                return {"status": True, "message": {"text": msg, "details": None}}
            except Exception as e:
                pulling_exception = e

        if clone_exception or pulling_exception:
            raised_exception = {"exc": pulling_exception, "msg": "Pulling"} if pulling_exception else {"exc": clone_exception, "msg": "Clone"}
            msg = f"{raised_exception['msg']} error: {raised_exception['exc']}"
            return {"status": False, "message": {"text": msg, "details": raised_exception['exc']}}



In [8]:
error_repos = []

def git_sync_error_callback(error):
    global error_repos
    error_repos.append(error)

    print(f'Error: {error["repo"]}, {error["directory"]}: {error["error"]}')

In [9]:
def git_sync_callback(git_sync_results):
    '''Shows what cloning or pulling did'''

    if git_sync_results["status"]:
        # syncing was successful
        print(git_sync_results["message"]["text"])
    else:
        # errors were raised
        print(f'#############{git_sync_results["message"]["text"]}: {git_sync_results["message"]["details"].args}')
        # git_sync_error_callback(git_sync_results)

In [None]:
# Make the cloning multithreaded

from multiprocessing.pool import ThreadPool

pool = ThreadPool()

for name in projects4cloning:
    repo = projects4cloning[name]
    pool.apply_async(
        git_sync,
        ( name, repo['ssh_link'], repo['repo_dir'] ),
        callback=git_sync_callback,
        # error_callback=git_sync_error_callback
    )

pool.close()
pool.join()

print("Finished the clone")
    


In [None]:
error_repos