## Load Quantlets from GitHub

Objective: to load Quantlet repositories to local machine.

***Note***: you need the *token.txt* file with the GitHub API token to access the Quanlet/LvB org. 

#### 1. Preparations

In [None]:
##### IMPORT DEPENDENCIES

from github import Github # type: ignore
import os
import time
import subprocess

##### CONSTANTS AND PATHS

DATA_PATH = "../../data"
POST_CLEANUP = False

2. Loading Quantlet repositories' names usign GitHub API

In [None]:
##### CONNECT TO THE QUANTLET REPO

# Read the token
try: 
    with open('token.txt') as f:
        token = f.readlines()[0]
except FileNotFoundError:
    print('Token file not found, please add it to the 1-Load-Quantlet-Data-From-Git directory or paste it below')

# or paste it here
if 'token' not in globals():
    token =   'YOUR TOKEN'

# authenticate using the token
g = Github(token)

# Logging into the organization
org = g.get_organization("QuantLet")
org.login

In [None]:
##### LIST AND INDEX QUANTLETS

names = []
for element in org.get_repos(type='public'):
   names.append(element.full_name)

if os.path.exists(os.path.join(DATA_PATH, 'QuantLet')):
    existing_Qs = os.listdir(os.path.join(DATA_PATH, 'QuantLet'))
    existing_Qs = [f'QuantLet/{quantlet}'for quantlet in existing_Qs]
    print(f'Number of existing Quantlets: {len(existing_Qs)}')
else:
    existing_Qs = []
    print('Number of existing Quantlets: 0')

new_Qs = set(names).difference(existing_Qs)
print(f'Number of new Quantlets: {len(new_Qs)}')

In [None]:
##### CLONE REPOSITORIES AS IS

for i, name in enumerate(new_Qs):
    print(i)
    print(name)
    subprocess.Popen(f"git clone https://github.com/{name} {DATA_PATH}/{name}", shell=True)
    if i%10==0:
        time.sleep(60)
    else:
        time.sleep(15)

In [None]:
##### REMOVE GIT FILES FROM DIRECTORIES

if POST_CLEANUP:
    for i, (root, directories, files) in enumerate(os.walk("data/QuantLet/")):
            if '.git' in directories:
                print(root)
            for file in files:
                if file.split(".")[-1] in set(["png", "jpg"]):                 
                    os.remove(os.path.join(root, file))