<div align="center">
  <h1 align="center">LizardByte Developer Dashboard</h1>
</div>

<div align="center">
[
  <a href="#Developer-Tools">Developer Tools</a> •
  <a href="#Repository-Data">Repository Data</a> •
  <a href="#Star-Gazers">Star Gazers</a> •
  <a href="#Forks">Forks</a> •
  <a href="#Open-Issues">Open Issues</a> •
  <a href="#Open-PRs">Open PRs</a> •
  <a href="#License-Distribution">License Distribution</a> •
  <a href="#Coverage">Coverage</a> •
  <a href="#Programming-Languages">Programming Languages</a> •
  <a href="#Documentation">Documentation</a>
]
</div>

<div align="center" id="Developer-Tools">
  <h2>Developer Tools</h2>
  <a href="https://app.codecov.io/gh/LizardByte">
    <img src="https://img.shields.io/badge/codecov-button?style=for-the-badge&logo=codecov&color=gray" alt="CodeCov">
  </a>
  <a href="https://crowdin.com/project/lizardbyte">
    <img src="https://img.shields.io/badge/crowdin%20%28projects%29-button?style=for-the-badge&logo=crowdin&color=gray" alt="Crowdin (projects)">
  </a>
  <a href="https://crowdin.com/project/lizardbyte-docs">
    <img src="https://img.shields.io/badge/crowdin%20%28docs%29-button?style=for-the-badge&logo=crowdin&color=gray" alt="Crowdin (docs)">
  </a>
  <a href="https://github.com/organizations/LizardByte/settings/actions/caches">
    <img src="https://img.shields.io/badge/github%20caches-button?style=for-the-badge&logo=github-actions&color=gray" alt="GitHub Caches">
  </a>
  <a href="https://github.com/organizations/LizardByte/settings/actions/hosted-runners">
    <img src="https://img.shields.io/badge/github%20hosted%20runners-button?style=for-the-badge&logo=github&color=gray" alt="GitHub Hosted Runners">
  </a>
  <a href="https://pypi.org/user/LizardByte/">
    <img src="https://img.shields.io/badge/pypi-button?style=for-the-badge&logo=pypi&color=gray" alt="PyPI">
  </a>
  <a href="https://readthedocs.org/dashboard/lizardbyte/subprojects/">
    <img src="https://img.shields.io/badge/readthedocs-button?style=for-the-badge&logo=readthedocs&color=gray" alt="ReadTheDocs">
  </a>
  <a href="https://sonarcloud.io/organizations/lizardbyte/projects">
    <img src="https://img.shields.io/badge/sonarcloud-button?style=for-the-badge&logo=sonarcloud&color=gray" alt="SonarCloud">
  </a>
</div>

<div align="center" id="Under Construction">
  <h3>Under Construction</h3>
  <a href="https://cloudsmith.io/~lizardbyte/repos">
    <img src="https://img.shields.io/badge/cloudsmith-button?style=for-the-badge&logo=cloudsmith&color=yellow" alt="Cloudsmith">
  </a>
  <a href="https://copr.fedorainfracloud.org/coprs/lizardbyte">
    <img src="https://img.shields.io/badge/fedora%20copr-button?style=for-the-badge&logo=fedora&color=yellow" alt="copr">
  </a>
  <a href="https://lizardbyte.youtrack.cloud">
    <img src="https://img.shields.io/badge/youtrack-button?style=for-the-badge&logo=jetbrains&color=yellow" alt="YouTrack">
  </a>
</div>

## Repository Data

In [None]:
# Initialize the environment

# standard imports
import os
import numpy as np

# lib imports
from dotenv import load_dotenv
from github import Github, UnknownObjectException
import pandas as pd
import plotly.express as px
import plotly.io as pio
import requests

# Load environment variables from .env file
load_dotenv()

# Authenticate with GitHub
token = os.getenv("GITHUB_TOKEN")
g = Github(token)

# set the default plotly template
pio.templates.default = "plotly_dark"

# Fetch repository data
org_name = "LizardByte"
org = g.get_organization(org_name)
repos = org.get_repos()

uno_base_url = 'https://app.lizardbyte.dev/uno'

# all readthedocs projects
# readthedocs data
readthedocs_data_url = f'{uno_base_url}/readthedocs/projects.json'
readthedocs_response = requests.get(readthedocs_data_url)
if not readthedocs_response.ok:
    raise Exception("Failed to fetch data from uno")
readthedocs_data = readthedocs_response.json()

# Process data
repo_data = []
for repo in repos:
    # get license
    license_name = repo.license.name if repo.license else "No License"

    # split open issues and PRs
    open_issues = repo.get_issues(state='open')
    open_prs = [issue for issue in open_issues if issue.pull_request is not None]
    open_issues = [issue for issue in open_issues if issue.pull_request is None]

    # coverage data
    coverage_url = f'{uno_base_url}/codecov/{repo.name}.json'
    coverage_response = requests.get(coverage_url)
    coverage = 0
    if coverage_response.status_code == 200:
        try:
            coverage_data = coverage_response.json()
            coverage = coverage_data['totals']['coverage']
        except Exception:
            pass

    # readthedocs data
    readthedocs_project = None
    for project in readthedocs_data:
        if project['repository']['url'] == repo.clone_url:
            readthedocs_project = project

    # has README.md or README.rst
    # check if the repo has a README.md or README.rst
    readme_file = None
    try:
        readme_file = repo.get_readme()
    except UnknownObjectException:
        pass

    repo_data.append({
        "repo": repo.name,
        "stars": repo.stargazers_count,
        "archived": repo.archived,
        "fork": repo.fork,
        "forks": repo.forks_count,
        "issues": open_issues,
        "topics": repo.get_topics(),
        "languages": repo.get_languages(),
        "license": license_name,
        "prs": open_prs,
        "created_at": repo.created_at,
        "updated_at": repo.updated_at,
        "coverage": coverage,
        "readthedocs": readthedocs_project,
        "has_readthedocs": readthedocs_project is not None,
        "has_readme": readme_file is not None,
        "_repo": repo,
    })

df = pd.DataFrame(repo_data)
df_repos = df[
    (~df['archived']) &
    (~df['topics'].apply(lambda topics: 'package-manager' in topics))
]
df_original_repos = df[
    (~df['archived']) &
    (~df['fork']) &
    (~df['topics'].apply(lambda topics: 'package-manager' in topics))
]

print(f'Total Repositories: {len(repo_data)}')
print(f'Archived Repositories: {df["archived"].sum()}')
print(f'Forked Repositories: {df["fork"].sum()}')

print(f'Total Open Issues: {df["issues"].apply(len).sum()}')
print(f'Total Open PRs: {df["prs"].apply(len).sum()}')
print(f'Open issues in active repositories: {df_repos["issues"].apply(len).sum()}')
print(f'Open PRs in active repositories: {df_repos["prs"].apply(len).sum()}')

### Star Gazers

In [None]:
# Stars
df_stars = df_repos.sort_values(
    by='stars',
    ascending=False,
)
df_stars['log_stars'] = np.log1p(df_stars['stars'])
fig = px.bar(
    df_stars,
    x='repo',
    y='log_stars',
    title='Stars',
    text='stars',
)
fig.update_traces(
    texttemplate='%{text}',
    textposition='inside',
)
fig.update_layout(
    yaxis_title=None,
    yaxis_showticklabels=False,
)
fig.show()

### Forks

In [None]:
# Forks
df_forks = df_repos.sort_values(
    by='forks',
    ascending=False,
)
df_forks['log_forks'] = np.log1p(df_forks['forks'])
fig = px.bar(
    df_forks,
    x='repo',
    y='log_forks',
    title='Forks',
    text='forks',
)
fig.update_traces(
    texttemplate='%{text}',
    textposition='inside',
)
fig.update_layout(
    yaxis_title=None,
    yaxis_showticklabels=False,
)
fig.show()

### Open Issues

In [None]:
# Open Issues

# Calculate the count of issues for each repository
df_repos.loc[:, 'issue_count'] = df_repos['issues'].apply(len)

# Sort by issue count in descending order
df_issues = df_repos.sort_values(
    by='issue_count',
    ascending=False,
)

# Calculate the log of issue count
df_issues.loc[:, 'log_issues'] = np.log1p(df_issues['issue_count'])

# Visualize data using a bar chart
fig = px.bar(
    df_issues,
    x='repo',
    y='log_issues',
    title='Open Issues',
    text='issue_count',
)
fig.update_traces(
    texttemplate='%{text}',
    textposition='inside',
)
fig.update_layout(
    yaxis_title=None,
    yaxis_showticklabels=False,
)
fig.show()

### Open PRs

In [None]:
# Open PRs
pr_data = []
for repo in df_repos.to_dict('records'):
    draft_prs = 0
    non_draft_prs = 0
    dependabot_prs = 0

    for pr in repo['prs']:
        pr_details = repo['_repo'].get_pull(pr.number)
        if pr_details.user.login == 'dependabot[bot]' or pr_details.user.login == 'renovate[bot]':
            dependabot_prs += 1
        elif pr_details.draft:
            draft_prs += 1
        else:
            non_draft_prs += 1

    pr_data.append({
        "repo": repo['repo'],
        "Draft": draft_prs,
        "Ready for review": non_draft_prs,
        "Dependency": dependabot_prs,
    })

df_prs = pd.DataFrame(pr_data)
df_prs['total_prs'] = df_prs[['Draft', 'Ready for review', 'Dependency']].sum(axis=1)

# Sort by total PRs in descending order
df_prs = df_prs.sort_values(by='total_prs', ascending=False)

# Visualize data using a stacked bar chart
fig = px.bar(
    df_prs,
    x='repo',
    y=['Draft', 'Ready for review', 'Dependency'],
    title='Open Pull Requests',
    labels={'value': 'Count', 'variable': 'PR Type'},
    barmode='stack'
)
fig.update_layout(
    yaxis_title='Count of PRs',
    xaxis_title='Repository',
)
fig.show()

### License Distribution

In [None]:
# License distribution
license_counts = df_repos.groupby(['license', 'repo']).size().reset_index(name='count')

# Create a nested treemap for license distribution with repo names in hover data
fig_treemap = px.treemap(
    license_counts,
    path=['license', 'repo'],
    values='count',
    title='License Distribution',
    hover_data={'repo': True, 'count': False},
)
fig_treemap.show()

### Coverage

In [None]:
# Coverage
df_coverage = df_repos.sort_values(
    by='coverage',
    ascending=False,
)
fig_scatter = px.scatter(
    df_coverage,
    x='repo',
    y='coverage',
    title='Coverage Percentage',
    size='coverage',
    color='coverage',
)
fig_scatter.update_layout(
    yaxis_title='Coverage Percentage',
    xaxis_title='Repository',
)
fig_scatter.show()

### Programming Languages

In [None]:
# Programming Languages by Bytes of Code
language_data = []
for repo in df_repos.to_dict('records'):
    for language, bytes_of_code in repo['languages'].items():
        language_data.append({
            "repo": repo['repo'],
            "language": language,
            "bytes_of_code": bytes_of_code,
        })

df_languages = pd.DataFrame(language_data)

# Aggregate data by language and repo
language_counts = df_languages.groupby(['language', 'repo']).agg({
    'bytes_of_code': 'sum'
}).reset_index()

fig_treemap = px.treemap(
    language_counts,
    path=['language', 'repo'],
    values='bytes_of_code',
    title='Programming Languages by Bytes of Code (Treemap)',
    hover_data={'repo': True, 'bytes_of_code': True}
)
fig_treemap.show()

# Programming Languages by Repo Count
language_counts = df_languages.groupby(['language', 'repo']).size().reset_index(name='repo_count')

fig_treemap = px.treemap(
    language_counts,
    path=['language', 'repo'],
    values='repo_count',
    title='Programming Languages by Repo Count (Treemap)',
    hover_data={'repo_count': True}
)
fig_treemap.show()

### Documentation

In [None]:
# Docs

# Initialize an empty list
docs_data = []
for repo in df_repos.to_dict('records'):
    docs_data.append({
        "repo": repo['repo'],
        "has_readme": repo['has_readme'],
        "has_readthedocs": repo['has_readthedocs'],
    })

df_docs = pd.DataFrame(docs_data)

# Aggregate data by has_readme and repo
readme_counts = df_docs.groupby(['has_readme', 'repo']).size().reset_index(name='repo_count')

# Visualize the data using a treemap
fig_treemap_readme = px.treemap(
    readme_counts,
    path=['has_readme', 'repo'],
    values='repo_count',
    title='Has README file',
)
fig_treemap_readme.show()

# Aggregate data by has_readthedocs and repo
readthedocs_counts = df_docs.groupby(['has_readthedocs', 'repo']).size().reset_index(name='repo_count')

# Visualize the data using a treemap
fig_treemap_readthedocs = px.treemap(
    readthedocs_counts,
    path=['has_readthedocs', 'repo'],
    values='repo_count',
    title='Uses ReadTheDocs',
)
fig_treemap_readthedocs.show()