In [117]:
from dotenv import load_dotenv
import os
import requests
from tqdm import tqdm
import json
load_dotenv('../.env')

True

In [118]:
BEARER_TOKEN = os.getenv('GITHUB_TOKEN')
if BEARER_TOKEN :
    print('GITHUB TOKEN found in envirionment')
else:
   print('GITHUB TOKEN not found')

GITHUB TOKEN found in envirionment


In [119]:
USER = 'Kaggle' # will take input from the user instead hardcoded
BASE_URL = 'https://api.github.com/'


response = requests.get(
    f'{BASE_URL}/users/{USER}/repos',
    headers={
        'Authorization': f'Bearer {BEARER_TOKEN}'
    }
)
repositories = response.json()
print(f'Total public repositories found : {len(repositories)}')

Total public repositories found : 12


In [120]:
class Repository:
    name = ''
    stargazers_count = 0
    forks_count = 0
    
    # further api metrics

    commits_count = 0
    contributors_count = 0
    branches_count = 0
    tags_count = 0
    releases_count = 0
    closed_issues_count = 0
    enviroments_count = 0

    def __init__(self, name, stargazers_count, forks_count):
        self.name = name
        self.stargazers_count = stargazers_count
        self.forks_count = forks_count

        self.commits_count = self.__get_metrics_count('commits')
        self.contributors_count = self.__get_metrics_count('contributors')
        self.branches_count = self.__get_metrics_count('branches')
        self.tags_count = self.__get_metrics_count('tags')
        self.closed_issues_count = self.__get_metrics_count('issues')
        self.releases_count = self.__get_metrics_count('releases')    
        self.enviroments_count = self.__get_metrics_count('environments')    
  

    def __get_metrics_count(self, url_extension):
        total_count = 0
        page = 1
        per_page = 100  # Maximum allowed by GitHub API
        
        while True:
            url = f'{BASE_URL}/repos/{USER}/{self.name}/{url_extension}?per_page={per_page}&page={page}'
            if url_extension == 'issues':
                url = f'{url}&state=closed'
                
            response = requests.get(url, headers={
                'Authorization': f'Bearer {BEARER_TOKEN}'
            })
            
            if url_extension == 'environments':
                return response.json().get('total_count', 0)
                
            data = response.json()
            
            if not data:  # Empty response means we've reached the end
                break

            
            if url_extension == 'issues':
                issues_without_pull_request = 0
                for issue in data:
                    if not issue.get('pull_request'):  # If pull_request is None, it's a regular issue
                        issues_without_pull_request += 1
                total_count += issues_without_pull_request
            else:
                total_count += len(data)
            
            # Check if there are more pages
            if 'next' not in response.links:
                break
                
            page += 1
            
        return total_count
    
    def to_dict(self):
        return {
            'name': self.name,
            'stargazers_count': self.stargazers_count,
            'forks_count': self.forks_count,
            'commits_count': self.commits_count,
            'contributors_count': self.contributors_count,
            'branches_count': self.branches_count,
            'tags_count': self.tags_count,
            'releases_count': self.releases_count,
            'closed_issues_count': self.closed_issues_count,
            'environments_count' : self.enviroments_count
            
        }

In [121]:
repo = Repository(repositories[6]['name'], repositories[6]['stargazers_count'], repositories[6]['forks_count'])
repo.to_dict()

{'name': 'kaggle-api',
 'stargazers_count': 6615,
 'forks_count': 1165,
 'commits_count': 278,
 'contributors_count': 45,
 'branches_count': 17,
 'tags_count': 10,
 'releases_count': 10,
 'closed_issues_count': 377,
 'environments_count': 0}

In [122]:
repo_metrics = []
for repo in tqdm(repositories, desc='Fetching repository metrics'):
    repo_metrics.append(Repository(repo['name'], repo['stargazers_count'], repo['forks_count']).to_dict())
repo_metrics[:2]


Fetching repository metrics: 100%|██████████| 12/12 [02:20<00:00, 11.70s/it]


[{'name': '.allstar',
  'stargazers_count': 1,
  'forks_count': 0,
  'commits_count': 1,
  'contributors_count': 0,
  'branches_count': 1,
  'tags_count': 0,
  'releases_count': 0,
  'closed_issues_count': 0,
  'environments_count': 0},
 {'name': 'docker-julia',
  'stargazers_count': 36,
  'forks_count': 12,
  'commits_count': 84,
  'contributors_count': 6,
  'branches_count': 1,
  'tags_count': 0,
  'releases_count': 0,
  'closed_issues_count': 3,
  'environments_count': 0}]

In [123]:
class RepostioyStats:
    
    def __init__(self, repositories):
        self.repositories = repositories
        self.total_repos = len(repositories)
        self.user = USER

        self.total_stargazers_count = sum([repo.get('stargazers_count') for repo in repositories])
        self.median_stargazers_count = self.__get_median(self.total_stargazers_count)
        
        self.total_forks_count = sum([repo.get('forks_count') for repo in repositories])
        self.median_forks_count = self.__get_median(self.total_forks_count)
        
        self.total_commits_count = sum([repo.get('commits_count') for repo in repositories])
        self.median_commits_count = self.__get_median(self.total_commits_count)

        self.total_contributors_count = sum([repo.get('contributors_count') for repo in repositories])
        self.median_contributors_count = self.__get_median(self.total_contributors_count)

        self.total_branches_count = sum([repo.get('branches_count') for repo in repositories])
        self.median_branches_count = self.__get_median(self.total_branches_count)

        self.total_tags_count = sum([repo.get('tags_count') for repo in repositories])
        self.median_tags_count = self.__get_median(self.total_tags_count)

        self.total_releases_count = sum([repo.get('releases_count') for repo in repositories])
        self.median_releases_count = self.__get_median(self.total_releases_count)

        self.total_closed_issues_count = sum([repo.get('closed_issues_count') for repo in repositories])
        self.median_closed_issues_count = self.__get_median(self.total_closed_issues_count)
        
        self.total_enviroments_count = sum([repo.get('environments_count') for repo in repositories])
        self.median_enviroments_count = self.__get_median(self.total_enviroments_count)
    

    def __get_median(self, value):
        return round(value / self.total_repos, 2)
    
    def to_dict(self):
        return {
            'total_repos': self.total_repos,
            'total_stargazers_count': self.total_stargazers_count,
            'median_stargazers_count': self.median_stargazers_count,
            'total_forks_count': self.total_forks_count,
            'median_forks_count': self.median_forks_count,
            'total_commits_count': self.total_commits_count,
            'median_commits_count': self.median_commits_count,
            'total_contributors_count': self.total_contributors_count,
            'median_contributors_count': self.median_contributors_count,
            'total_branches_count': self.total_branches_count,
            'median_branches_count': self.median_branches_count,
            'total_tags_count': self.total_tags_count,
            'median_tags_count': self.median_tags_count,
            'total_releases_count': self.total_releases_count,
            'median_releases_count': self.median_releases_count,
            'total_closed_issues_count': self.total_closed_issues_count,
            'median_issues_count': self.median_closed_issues_count,
            'total_environments_count' : self.total_enviroments_count,
            'median_environments_count' : self.median_enviroments_count 
        }

In [124]:
repo_stats = RepostioyStats(repo_metrics)
repo_stats.to_dict()

{'total_repos': 12,
 'total_stargazers_count': 10469,
 'median_stargazers_count': 872.42,
 'total_forks_count': 2747,
 'median_forks_count': 228.92,
 'total_commits_count': 7162,
 'median_commits_count': 596.83,
 'total_contributors_count': 306,
 'median_contributors_count': 25.5,
 'total_branches_count': 232,
 'median_branches_count': 19.33,
 'total_tags_count': 436,
 'median_tags_count': 36.33,
 'total_releases_count': 316,
 'median_releases_count': 26.33,
 'total_closed_issues_count': 847,
 'median_issues_count': 70.58,
 'total_environments_count': 0,
 'median_environments_count': 0.0}

In [125]:
final_stats_summary = {
    'user' : repo_stats.user,
    'stats' : repo_stats.to_dict(),
}

final_stats_detailed = {
    'user' : repo_stats.user,
    'stats' : repo_stats.to_dict(),
    'repositories' : repo_stats.repositories
}


In [126]:
with open('repo_stats_summary.json', 'w') as f:
    json.dump(final_stats_summary, f)

with open('repo_stats_detailed.json', 'w') as f:
    json.dump(final_stats_detailed, f)
