In [1]:
import requests
response = requests.get('https://api.github.com/repos/octocat/hello-world')

In [2]:
print(response.text)

{"id":1296269,"node_id":"MDEwOlJlcG9zaXRvcnkxMjk2MjY5","name":"Hello-World","full_name":"octocat/Hello-World","private":false,"owner":{"login":"octocat","id":583231,"node_id":"MDQ6VXNlcjU4MzIzMQ==","avatar_url":"https://avatars.githubusercontent.com/u/583231?v=4","gravatar_id":"","url":"https://api.github.com/users/octocat","html_url":"https://github.com/octocat","followers_url":"https://api.github.com/users/octocat/followers","following_url":"https://api.github.com/users/octocat/following{/other_user}","gists_url":"https://api.github.com/users/octocat/gists{/gist_id}","starred_url":"https://api.github.com/users/octocat/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/octocat/subscriptions","organizations_url":"https://api.github.com/users/octocat/orgs","repos_url":"https://api.github.com/users/octocat/repos","events_url":"https://api.github.com/users/octocat/events{/privacy}","received_events_url":"https://api.github.com/users/octocat/received_events","type":"

In [3]:
import json

In [4]:
data_dict = json.loads(response.text)

In [5]:
data_dict

{'id': 1296269,
 'node_id': 'MDEwOlJlcG9zaXRvcnkxMjk2MjY5',
 'name': 'Hello-World',
 'full_name': 'octocat/Hello-World',
 'private': False,
 'owner': {'login': 'octocat',
  'id': 583231,
  'node_id': 'MDQ6VXNlcjU4MzIzMQ==',
  'avatar_url': 'https://avatars.githubusercontent.com/u/583231?v=4',
  'gravatar_id': '',
  'url': 'https://api.github.com/users/octocat',
  'html_url': 'https://github.com/octocat',
  'followers_url': 'https://api.github.com/users/octocat/followers',
  'following_url': 'https://api.github.com/users/octocat/following{/other_user}',
  'gists_url': 'https://api.github.com/users/octocat/gists{/gist_id}',
  'starred_url': 'https://api.github.com/users/octocat/starred{/owner}{/repo}',
  'subscriptions_url': 'https://api.github.com/users/octocat/subscriptions',
  'organizations_url': 'https://api.github.com/users/octocat/orgs',
  'repos_url': 'https://api.github.com/users/octocat/repos',
  'events_url': 'https://api.github.com/users/octocat/events{/privacy}',
  'received

In [6]:
def get_repo_details(username, repo_name):
    print('Fetching information for {}/{}'.format(username, repo_name))
    repo_details_url = "https://api.github.com/repos/" + username + "/" + repo_name
    response = requests.get(repo_details_url)
    if not response.ok:
        print("Failed to fetch!")
        return {}
    repo_data = json.loads(response.text)
    return {
        'description': repo_data['description'],
        'watchers': repo_data['watchers_count'],
        'open_issues': repo_data['open_issues_count'],
        'created_at': repo_data['created_at'],
        'updated_at': repo_data['updated_at']
    }

In [7]:
get_repo_details('tensorflow', 'tensorflow')

Fetching information for tensorflow/tensorflow


{'description': 'An Open Source Machine Learning Framework for Everyone',
 'watchers': 183392,
 'open_issues': 3002,
 'created_at': '2015-11-07T01:19:20Z',
 'updated_at': '2024-06-12T05:17:56Z'}

In [8]:
from bs4 import BeautifulSoup
def get_topic_page(topic):
    # Construct the URL
    topic_repos_url = 'https://github.com/topics/' + topic
    
    # Get the HTML page content using requests
    response = requests.get(topic_repos_url)
    
    # Ensure that the reponse is valid
    # if not response.ok:
    
    if response.status_code != 200:
        print('Status code:', response.status_code)
        raise Exception('Failed to fetch web page ' + topic_repos_url)
    
    # Construct a beautiful soup document
    doc = BeautifulSoup(response.text)
    
    return doc

In [9]:
doc = get_topic_page('Machine Learning')

In [10]:
article_tags = doc.find_all('article', class_='border rounded color-shadow-small color-bg-subtle my-4')

In [11]:
base_url = 'https:\\github.com'

def parse_repository(article_tag):
    # <a> tags containing username, repository name and URL
    a_tags = article_tag.h3.find_all('a')
    # Owner's username
    username = a_tags[0].text.strip()
    # Repository name
    repo_name = a_tags[1].text.strip()
    # Repository URL
    repo_url = base_url + a_tags[1]['href'].strip()
    # Star count
    stars_tag = article_tag.find('span', class_='Counter js-social-count')
    star_count = parse_star_count(stars_tag.text.strip())
    # Return a dictionary
    return {
        'repository_name': repo_name,
        'owner_username': username,        
        'stars': star_count,
        'repository_url': repo_url
    }

In [12]:
def parse_star_count(stars_str):
    stars_str = stars_str.strip()
    if stars_str[-1] == 'k':
        return int(float(stars_str[:-1]) * 1000)
    else:
        return int(stars_str)

In [13]:
top_repositories = [parse_repository(tag) for tag in article_tags]

In [14]:
def add_repo_details(repos):
    return [dict(**get_repo_details(repo['owner_username'], repo['repository_name']), **repo) for repo in repos]

In [15]:
add_repo_details(top_repositories[:5])

Fetching information for tensorflow/tensorflow
Fetching information for huggingface/transformers
Fetching information for pytorch/pytorch
Fetching information for netdata/netdata
Fetching information for microsoft/ML-For-Beginners


[{'description': 'An Open Source Machine Learning Framework for Everyone',
  'watchers': 183392,
  'open_issues': 3002,
  'created_at': '2015-11-07T01:19:20Z',
  'updated_at': '2024-06-12T05:17:56Z',
  'repository_name': 'tensorflow',
  'owner_username': 'tensorflow',
  'stars': 183000,
  'repository_url': 'https:\\github.com/tensorflow/tensorflow'},
 {'description': '🤗 Transformers: State-of-the-art Machine Learning for Pytorch, TensorFlow, and JAX.',
  'watchers': 127448,
  'open_issues': 1115,
  'created_at': '2018-10-29T13:56:00Z',
  'updated_at': '2024-06-12T05:14:33Z',
  'repository_name': 'transformers',
  'owner_username': 'huggingface',
  'stars': 127000,
  'repository_url': 'https:\\github.com/huggingface/transformers'},
 {'description': 'Tensors and Dynamic neural networks in Python with strong GPU acceleration',
  'watchers': 79366,
  'open_issues': 14146,
  'created_at': '2016-08-13T05:26:41Z',
  'updated_at': '2024-06-12T05:34:01Z',
  'repository_name': 'pytorch',
  'owne