# Proof_of_concept of gitlab metadata extraction
---

In [1]:
import requests
print(requests.__version__)



2.31.0


In [19]:
# REQUIRED PROP. META DATA FROM GITLAB REPO

import requests
from urllib.parse import urlparse, quote

def get_gitlab_metadata(repo_url, token=None):
    
    parsed = urlparse(repo_url)
    # print(f'### -------------- ### \nparsed: {parsed}')
    
    gitlab_host = f"{parsed.scheme}://{parsed.netloc}" # just the base URL (gitlab)
    # print(f'gitlab_host: {gitlab_host}')
    
    project_path = parsed.path.lstrip("/").removesuffix(".git") # making it api friendly
    # print(f'project_path: {project_path}')
    
    encoded_path = quote(project_path, safe="") # w/o slashes for API
    # print(f'encoded_path: {encoded_path}')
    
    api_base = f"{gitlab_host}/api/v4"
    # print(f'api_base: {api_base}\n### -------------- ###\n')

    headers = {"PRIVATE-TOKEN": token} if token else {} # tokens/pass for private repos [Only if needed]

    def fetch_json(endpoint):
        resp = requests.get(f"{api_base}{endpoint}", headers=headers, timeout=10)
        return resp.json() if resp.ok else {}

    project = fetch_json(f"/projects/{encoded_path}")
    print(f'project: {project}\n')
    
    languages = fetch_json(f"/projects/{encoded_path}/languages")
    tags = fetch_json(f"/projects/{encoded_path}/repository/tags")
    commit = fetch_json(f"/projects/{encoded_path}/repository/commits")
    branches = fetch_json(f"/projects/{encoded_path}/repository/branches")
    license = fetch_json(f"/projects/{encoded_path}/license")
    authors = fetch_json(f"/projects/{encoded_path}/repository/contributors")

    '''
    'get' can return (id, name, description, web_url, http_url_to_repo, readme_url, tag_list, topics, visibility, 
    created_at, last_activity_at, star_count, forks_count, open_issues_count, namespace, owner, container_registry_image_prefix)
    '''
    required = {
        "Name": project.get("name", "N/A"),
        "Description": project.get("description", "N/A"),
        "URL": project.get("web_url", "N/A"),
        "Clone URL": project.get("http_url_to_repo", "N/A"),
        "Languages": ", ".join(languages.keys()) if languages else "N/A",
        "Version": tags[0]["name"] if isinstance(tags, list) and tags else "No version tag",
    }

    recommended = {
        "Latest Commit": commit[0]["id"] if isinstance(commit, list) and commit else "N/A",
        "Branches": len(branches) if isinstance(branches, list) else "N/A",
        "License": license.get("name", "N/A") if isinstance(license, dict) else "N/A",
        "Authors": ", ".join([a["name"] for a in authors]) if isinstance(authors, list) else "N/A",
    }

    return {"required": required, "recommended": recommended}

if __name__ == "__main__":
    repos = [
        "https://gitlab.com/api4ai/examples/img-bg-removal-people.git",
        "https://gitlab.ub.uni-giessen.de/gc2052/machine-learning.git",
        "https://gitlab.com/QMAI/papers/adversarialkitaevchain.git"
    ]

    for r in repos:
        print(f"\nFetching metadata for: {r}")
        try:
            meta = get_gitlab_metadata(r)
            print(f'### Required Peoperties ###\n')
            for keys, values in meta["required"].items():
                print(f"{keys}: {values}")
            print(f"\n### Recommended Properties ###\n")
            for keys, values in meta["recommended"].items():
                print(f"{keys}: {values}")
        except Exception as e:
            print(f"Error: {e}")



Fetching metadata for: https://gitlab.com/api4ai/examples/img-bg-removal-people.git
project: {'id': 45236194, 'description': 'API4AI is cloud-native computer vision & AI platform for startups, enterprises and individual developers. This repository contains sample mini apps that utilizes People Photo Background Removal API provided by API4AI.', 'name': 'People Photo Background Removal', 'name_with_namespace': 'api4ai / Examples / People Photo Background Removal', 'path': 'img-bg-removal-people', 'path_with_namespace': 'api4ai/examples/img-bg-removal-people', 'created_at': '2023-04-17T11:24:59.231Z', 'default_branch': 'main', 'tag_list': ['AI', 'api', 'api4ai', 'automation', 'computer vision', 'image processing', 'javascript', 'js', 'machine-learning', 'nodejs', 'python', 'shell', 'web'], 'topics': ['AI', 'api', 'api4ai', 'automation', 'computer vision', 'image processing', 'javascript', 'js', 'machine-learning', 'nodejs', 'python', 'shell', 'web'], 'ssh_url_to_repo': 'git@gitlab.com:ap