# Proof_of_concept of gitlab metadata extraction
---

In [1]:
import requests
print(requests.__version__)



2.32.5


In [None]:
# REQUIRED PROP. META DATA FROM GITLAB REPO

import requests
from urllib.parse import urlparse, quote

def get_gitlab_metadata(repo_url, token=None):
    
    parsed = urlparse(repo_url)
    print(f'### -------------- ### \nparsed: {parsed}')
    
    gitlab_host = f"{parsed.scheme}://{parsed.netloc}" # just the base URL (gitlab)
    print(f'gitlab_host: {gitlab_host}')
    
    project_path = parsed.path.lstrip("/").removesuffix(".git") # making it api friendly
    print(f'project_path: {project_path}')
    
    encoded_path = quote(project_path, safe="") # w/o slashes for API
    print(f'encoded_path: {encoded_path}')
    
    api_base = f"{gitlab_host}/api/v4"
    print(f'api_base: {api_base}\n### -------------- ###\n')

    headers = {"PRIVATE-TOKEN": token} if token else {} # tokens/pass for private repos [Only if needed]

    def fetch_json(endpoint):
        resp = requests.get(f"{api_base}{endpoint}", headers=headers, timeout=10)
        return resp.json() if resp.ok else {}

    project = fetch_json(f"/projects/{encoded_path}")
    print(f'project: {project}\n')
    
    languages = fetch_json(f"/projects/{encoded_path}/languages")
    tags = fetch_json(f"/projects/{encoded_path}/repository/tags")

    '''
    'get' can return (id, name, description, web_url, http_url_to_repo, readme_url, tag_list, topics, visibility, 
    created_at, last_activity_at, star_count, forks_count, open_issues_count, namespace, owner, container_registry_image_prefix)
    '''

    return {
        "Name": project.get("name", "N/A"),
        "Description": project.get("description", "N/A"),
        "URL": project.get("web_url", "N/A"),
        "Clone URL": project.get("http_url_to_repo", "N/A"),
        "Languages": ", ".join(languages.keys()) if languages else "N/A",
        "Version": tags[0]["name"] if isinstance(tags, list) and tags else "No version tag",
    }

if __name__ == "__main__":
    repos = [
        "https://gitlab.com/api4ai/examples/img-bg-removal-people.git",
        "https://gitlab.ub.uni-giessen.de/gc2052/machine-learning.git"
    ]

    for r in repos:
        print(f"\nFetching metadata for: {r}")
        try:
            meta = get_gitlab_metadata(r)
            for keys, values in meta.items():
                print(f"{keys}: {values}")
        except Exception as e:
            print(f"Error: {e}")



Fetching metadata for: https://gitlab.com/api4ai/examples/img-bg-removal-people.git
### -------------- ### 
parsed: ParseResult(scheme='https', netloc='gitlab.com', path='/api4ai/examples/img-bg-removal-people.git', params='', query='', fragment='')
gitlab_host: https://gitlab.com
project_path: api4ai/examples/img-bg-removal-people
encoded_path: api4ai%2Fexamples%2Fimg-bg-removal-people
api_base: https://gitlab.com/api/v4
### -------------- ###

project: {'id': 45236194, 'description': 'API4AI is cloud-native computer vision & AI platform for startups, enterprises and individual developers. This repository contains sample mini apps that utilizes People Photo Background Removal API provided by API4AI.', 'name': 'People Photo Background Removal', 'name_with_namespace': 'api4ai / Examples / People Photo Background Removal', 'path': 'img-bg-removal-people', 'path_with_namespace': 'api4ai/examples/img-bg-removal-people', 'created_at': '2023-04-17T11:24:59.231Z', 'default_branch': 'main', '