<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# GitHub - Clone open branches from repository on local
<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/GitHub/GitHub_Clone_repository.ipynb" target="_parent"><img src="https://naasai-public.s3.eu-west-3.amazonaws.com/Open_in_Naas_Lab.svg"/></a><br><br><a href="https://bit.ly/3JyWIk6">Give Feedbacks</a> | <a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/Naas/Naas_Start_data_product.ipynb" target="_parent">Generate Data Product</a>

**Tags:** #github #snippet #operations #repository #efficiency

**Author:** [Antonio Georgiev](www.linkedin.com/in/antonio-georgiev-b672a325b)

**Description:** Automates cloning of open branches from a GitHub repository to a local machine and deletes the branches with PRs closed more than 2 weeks ago that are still saved on local machine.

**References:**
- [GitHub Documentation - Cloning a repository](https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/cloning-a-repository)

## Input

### Import libraries

In [5]:
import os
from datetime import datetime, timedelta
import naas
import pandas as pd
import requests
from pprint import pprint
import subprocess
from github import Github
import json

### Setup Variables
- `repo_url`: URL of the repository to clone
- `output_dir`: Output directory to clone repo. If None, we will create a folder with the name of the repo
- `token`: [Generate a personal access token](https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token)
- `owner`: owner of the repository
- `repository`: name of the repository

In [None]:
# Inputs
repo_url = "https://github.com/jupyter-naas/awesome-notebooks"

# Outputs
output_dir = None

# Setup variables for list branches with open PR
token = naas.secret.get(name="GITHUB_TOKEN") or "YOUR_GITHUB_TOKEN"
owner = "jupyter-naas" #Example for naas
repository = "awesome-notebooks" #Example for naas awesome-notebooks repository
repo_name = "jupyter-naas/awesome-notebooks"

## Model

### Schedule the notebook

In [None]:
naas.scheduler.add(cron="*/30 * * * *")

### Identify missing repositories on local

In [3]:
def get_branches_with_open_prs(
    token,
    owner,
    repository
):
    url = f"https://api.github.com/repos/{owner}/{repository}/pulls"
    headers = {"Authorization": f"token {token}"}
    response = requests.get(url, headers=headers)
    pulls = response.json()
    
    branches_data = []
    
    for pull in pulls:
        branch = pull['head']['ref']
        creator = pull['user']['login']
        creation_date = pull['created_at']
        
        branches_data.append({
            'branch': branch,
            'creator': creator,
            'creation_date': creation_date
        })
    
    branches_df = pd.DataFrame(branches_data)
    return branches_df

branches_with_open_prs = get_branches_with_open_prs(token, owner, repository)

### Clone repository
Clone the repository from the given URL and create a local copy of it.

In [4]:
def clone_branch(repo_url, output_dir):
    # Get GitHub owner and repo name
    owner = repo_url.split("https://github.com/")[-1].split("/")[0]
    repo_name = repo_url.split("/")[-1]
    
    # Add repo name with .git extension
    if not repo_name.endswith(".git"):
        repo_name = f"{repo_name}.git"
    repo = f"{owner}/{repo_name}"
        
    # Init output dir
    if not output_dir:
        output_dir = repo_name[:-4]
    
    # Create output directoy
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    # GitHub Action
    !cd '{output_dir}'
    !git clone git@github.com:'{repo}' '{output_dir}'
    print(f"✅ GitHub repo cloned: {output_dir}")
    return output_dir

### Clone the branches with open PRs that haven't been cloned yet

In [None]:
for index, row in branches_with_open_prs.iterrows():
    branch_name = row['branch']
    if not os.path.exists(branch_name):
        output_dir = clone_branch(repo_url, None, branch_name)

### Get folders stored on local machine (cloned branches)

In [None]:
def get_all_folders(directory):
    folders = []
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)
        if os.path.isdir(item_path):
            folders.append(item_path)
    return folders

# Example: Get all folders in the current working directory
folders = get_all_folders(".")
print(folders)

### Find the branches with PRs closed more than 2 weeks ago

In [None]:
def get_branches_with_closed_prs(
    token,
    owner,
    repository
):
    url = f"https://api.github.com/repos/{owner}/{repository}/pulls?state=closed"
    headers = {"Authorization": f"token {token}"}
    response = requests.get(url, headers=headers)
    pulls = response.json()
    
    branches_data = []
    
    two_weeks_ago = datetime.now() - timedelta(weeks=2)
    
    for pull in pulls:
        closed_at = datetime.strptime(pull['closed_at'], "%Y-%m-%dT%H:%M:%SZ")
        
        if closed_at >= two_weeks_ago:
            branch = pull['head']['ref'].replace("refs/heads/", "")  # Remove the prefix
            creator = pull['user']['login']
            creation_date = pull['created_at']
            
            branches_data.append({
                'branch': branch,
                'creator': creator,
                'creation_date': creation_date
            })
    
    branches_df = pd.DataFrame(branches_data)
    return branches_df


branches_with_closed_prs = get_branches_with_closed_prs(token, owner, repository)

### Identify the cloned branches on the local machine that have closed PRs

In [None]:
matches = [branch for branch in branches_with_closed_prs['branch'] if branch in folders]
print("Branches with closed PRs that exist on the local machine:")

### Delete the folders with closed PRs

In [None]:
def delete_folders(matches):
    for folder_name in matches:
        try:
            shutil.rmtree(folder_name)
            print(f"Deleted folder: {folder_name}")
        except Exception as e:
            print(f"Error deleting folder {folder_name}: {e}")

delete_folders(matches)