<div class="alert alert-info">
    <center><b>Set up Notebook</b></center>
</div>

In [None]:
%pip install pandas tqdm python-dotenv

In [None]:
from tqdm import tqdm
from datetime import datetime
import pytz
from concurrent.futures import ThreadPoolExecutor, as_completed
import traceback
from typing import List

In [None]:
from os import path, listdir
from sys import path as sys_path
parent_dir = path.abspath(path.join('..'))
if parent_dir not in sys_path:
    sys_path.append(parent_dir)
    print(f"Added {parent_dir.split("\\")[-1]} to sys.path")
from models.commit import Commit
from utils.worker import get_optimal_max_workers

<div class="alert alert-info">
    <center><b>Add all methods</b></center>
</div>

In [None]:
max_workers = get_optimal_max_workers()
print(max_workers)

In [None]:
def process_all_commits(parent_folder: str) -> None:
    """Adds all the commits from the repositories from all organizations downloaded locally.

    Args:
        parent_folder (str) - The path to the folder containing all organizations.

    Returns:
        None
    """
    repo_paths = []
    
    for sub_dir in listdir(parent_folder):
        sub_dir_path = path.join(parent_folder, sub_dir)
        if path.isdir(sub_dir_path):
            for repo_dir in listdir(sub_dir_path):
                repo_dir_path = path.join(sub_dir_path, repo_dir)
                if path.isdir(repo_dir_path):
                    repo_paths.append(repo_dir_path)
    
    repo_paths = list(set(repo_paths))
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        
        future_commits_from_repo = {
            executor.submit(Commit.get_commit_data, path.join(parent_folder, repo.split("\\")[-2],repo.split("\\")[-1]), datetime.now(pytz.timezone("UTC"))
            ): repo
            for repo in repo_paths
        }

        for future in tqdm(as_completed(future_commits_from_repo), total=len(future_commits_from_repo), desc="Processing repositories"):
            try:
                commits_data: List['Commit'] = list(set(future.result()))
                executor.submit(Commit.add_commit_in_batches, commits_data)
            except Exception as e:
                print(f"Error processing {future_commits_from_repo[future]}: {e}")
                traceback.print_exc()

<div class="alert alert-info">
    <center><b>Extract all commits and populate the database with them</b></center>
</div>

In [None]:
process_all_commits(path.join('..', 'download', 'orgs'))