# Proof_of_concept of gitlab metadata extraction
---

In [1]:
import requests
print(requests.__version__)



2.31.0


In [31]:
# REQUIRED PROP. META DATA FROM GITLAB REPO

import requests
from urllib.parse import urlparse, quote

def get_gitlab_metadata(repo_url, token=None):
    
    parsed = urlparse(repo_url)
    # print(f'### -------------- ### \nparsed: {parsed}')
    
    gitlab_host = f"{parsed.scheme}://{parsed.netloc}" # just the base URL (gitlab)
    # print(f'gitlab_host: {gitlab_host}')
    
    project_path = parsed.path.lstrip("/").removesuffix(".git") # making it api friendly
    # print(f'project_path: {project_path}')
    
    encoded_path = quote(project_path, safe="") # w/o slashes for API
    # print(f'encoded_path: {encoded_path}')
    
    api_base = f"{gitlab_host}/api/v4"
    # print(f'api_base: {api_base}\n### -------------- ###\n')

    headers = {"PRIVATE-TOKEN": token} if token else {} # tokens/pass for private repos [Only if needed]

    def fetch_json(endpoint):
        resp = requests.get(f"{api_base}{endpoint}", headers=headers, timeout=10)
        return resp.json() if resp.ok else {}

    project = fetch_json(f"/projects/{encoded_path}")
    print(f'project: {(project.keys())}\n')
    
    '''
    # To print all project keys and values in the project dictionary
    for p in project:
        print(f'{p}: {project[p]}\n')
    '''

    languages = fetch_json(f"/projects/{encoded_path}/languages")
    tags = fetch_json(f"/projects/{encoded_path}/repository/tags")
    commit = fetch_json(f"/projects/{encoded_path}/repository/commits")
    branches = fetch_json(f"/projects/{encoded_path}/repository/branches")
    license = fetch_json(f"/projects/{encoded_path}/license")
    contributors = fetch_json(f"/projects/{encoded_path}/repository/contributors")
    



    '''
    'get' can return (id, desccription, name, name_with_namespace, path, path_with_namespace, created_at, default_branch, tag_list, ssh_url_to_repo, 
    http_url_to_repo, web_url, readme_url, avatar_url, star_count, forks_count, last_activity_at, visibility, namespace.)
    '''

    required = {
        "Name": project.get("name", "N/A"),
        "Description": project.get("description", "N/A"),
        "URL": project.get("web_url", "N/A"),
        "Clone URL": project.get("http_url_to_repo", "N/A"),
        "Languages": ", ".join(languages.keys()) if languages else "N/A",
        "Version": tags[0]["name"] if isinstance(tags, list) and tags else "No version tag",
    }

    recommended = {
        "Latest Commit": commit[0]["id"] if isinstance(commit, list) and commit else "N/A",
        "Branches": len(branches) if isinstance(branches, list) else "N/A",
        "License": license.get("name", "N/A") if isinstance(license, dict) else "N/A",
        "Contributors": ", ".join([c["name"] for c in contributors]) if isinstance(contributors, list) else "N/A",
        "README_url": project.get("readme_url", "N/A"),
    }
 
    return {"required": required, "recommended": recommended}

if __name__ == "__main__":
    repos = [
        "https://gitlab.com/remram44/taguette.git"
    ]

    for r in repos:
        print(f"\nFetching metadata for: {r}")
        try:
            meta = get_gitlab_metadata(r)
            print(f'### Required Peoperties ###\n')
            for keys, values in meta["required"].items():
                print(f"{keys}: {values}")
            print(f"\n### Recommended Properties ###\n")
            for keys, values in meta["recommended"].items():
                print(f"{keys}: {values}")
        except Exception as e:
            print(f"Error: {e}")



Fetching metadata for: https://gitlab.com/remram44/taguette.git
project: dict_keys(['id', 'description', 'name', 'name_with_namespace', 'path', 'path_with_namespace', 'created_at', 'default_branch', 'tag_list', 'topics', 'ssh_url_to_repo', 'http_url_to_repo', 'web_url', 'readme_url', 'forks_count', 'avatar_url', 'star_count', 'last_activity_at', 'visibility', 'namespace'])

### Required Peoperties ###

Name: taguette
Description: Free and open source qualitative research tool
URL: https://gitlab.com/remram44/taguette
Clone URL: https://gitlab.com/remram44/taguette.git
Languages: Python, JavaScript, HTML, CSS, Dockerfile
Version: v1.5.1

### Recommended Properties ###

Latest Commit: add9419898abc0a87b5d0c2f35f5a8daf1347469
Branches: 20
License: N/A
Contributors: Tim Gates, Jan Dittrich, Stéphane Guillou, Mikaël Francoeur, Kosovskih Svyatoslav, Stuart Geiger, vagrant, Mohammad Rizki, Yannis Kaskamanidis, Vicky Rampin, Guillaume Deflaux, Andrew Nichols, jgwl, Vicky Steeves, Vicky Steeve