# Github Repository Data Exploration  

## Setup 

Ensure you are in the `coding-smart-github` conda environment and have the following packages in your environment:
```

```

### Github Authentication 

Create a classic access token via [Github Authentication Settings](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token#creating-a-personal-access-token-classic) and create a file called `config.cfg` with the following content: 
```
[ACCESS]
token = <your-access-token>
```
Ensure you've pasted in your token, but leave `[ACCESS]` and `token = `.

In [1]:
import pandas as pd
pd.set_option('max_colwidth', 1000)

import numpy as np

import os

import configparser

import requests

from github import Github

import datetime

from tqdm import tqdm

import matplotlib.pyplot as plt

In [2]:
# set up github access token with github package: 

config = configparser.ConfigParser()
config.read('../config.cfg')
config.sections()

access_token = config['ACCESS']['token']
g = Github(access_token)

In [4]:
# PARSE GITHUB REPO URL INTO USERNAME AND REPO NAME

 # via @karacolada 
def parse_github_repo_url(url):  
    """ Take github URL and split into username or organisation ('username') and repo name ('repo_name'). 
    Return username and repo name. 
    URL must start with 'https://' to split correctly.
    
    Examples: 
    >>> parse_github_repo_url("https://github.com/riboviz/riboviz")
    ('riboviz', 'riboviz')

    >>> parse_github_repo_url("https://github.com/FlicAnderson/20230215-JournalClub-BestPractices")
    ('FlicAnderson', '20230215-JournalClub-BestPractices')
    """
    assert isinstance(url, str), 'Ensure input url is a string' 
    
    try:
        _,_,_,username,repo_name = url.split('/')
    except ValueError:
        print(f"Could not unpack URL {url} into 5 segments. Confirm input is correct (and starts 'https://github.com/') or refactor function parse_github_repo_url.")
    
    return username, repo_name

In [9]:
parse_github_repo_url("https://github.com/riboviz/riboviz")
#parse_github_repo_url("https://github.com/FlicAnderson/20230215-JournalClub-BestPractices")  # extra example
#parse_github_repo_url(123)  # this correctly triggers assertion error.

('riboviz', 'riboviz')

In [10]:
# GET REPO JSON DATA 

def get_repo_json(username, repo_name):
    
    """ Gets json data for a github repository when given a valid username and repository name. 
    Returns 'json_content', a dict. 
        
    Examples: 
    
    >>> get_repo_json('FlicAnderson', '20230215-JournalClub-BestPractices')
    {'id': 595202904,
     'node_id': 'R_kgDOI3oTWA',
     'name': '20230215-JournalClub-BestPractices',
     'full_name': 'FlicAnderson/20230215-JournalClub-BestPractices',
     'private': False,
     'owner': {'login 
     ... lots more content...
    
    # To access the value for a given key, save output to a variable and index the key name: 
    >>> content = get_repo_json('FlicAnderson', '20230215-JournalClub-BestPractices')
    >>> content['has_issues'] 
    True
    """
    json_content = requests.get(f'https://api.github.com/repos/{username}/{repo_name}').json() 
                
    return json_content

In [13]:
get_repo_json('riboviz', 'riboviz')

{'id': 184749972,
 'node_id': 'MDEwOlJlcG9zaXRvcnkxODQ3NDk5NzI=',
 'name': 'riboviz',
 'full_name': 'riboviz/riboviz',
 'private': False,
 'owner': {'login': 'riboviz',
  'id': 50236323,
  'node_id': 'MDEyOk9yZ2FuaXphdGlvbjUwMjM2MzIz',
  'avatar_url': 'https://avatars.githubusercontent.com/u/50236323?v=4',
  'gravatar_id': '',
  'url': 'https://api.github.com/users/riboviz',
  'html_url': 'https://github.com/riboviz',
  'followers_url': 'https://api.github.com/users/riboviz/followers',
  'following_url': 'https://api.github.com/users/riboviz/following{/other_user}',
  'gists_url': 'https://api.github.com/users/riboviz/gists{/gist_id}',
  'starred_url': 'https://api.github.com/users/riboviz/starred{/owner}{/repo}',
  'subscriptions_url': 'https://api.github.com/users/riboviz/subscriptions',
  'organizations_url': 'https://api.github.com/users/riboviz/orgs',
  'repos_url': 'https://api.github.com/users/riboviz/repos',
  'events_url': 'https://api.github.com/users/riboviz/events{/privacy}