## github api

In [1]:
import requests
from bs4 import BeautifulSoup

### 1. get_user_info(username)

возвращает словарь с информацией о юзере. Ключи: имя (которое НЕ никнейм), организация, местоположение, число подписчиков, число репозиториев.

In [60]:
def get_user_info(username):
    """
    This function accept GitHub username and parses from it Name, Organization, Location, number of public repos
    and number of followers
    @ return: user_info, dict
    """
    url = 'https://github.com/' + username
    req = requests.get(url)
    soup = BeautifulSoup(req.content)
    user_info = {'Name':None, 'Organization':None, 'Location':None, 'n_repos':None, 'n_subscribers':None}
    try:
        user_info['Name'] = soup.find('span', itemprop = 'name').text.strip()
    except:
        pass
    try:
        user_info['Organization'] = soup.find("span", class_="p-org").text
    except:
        pass
    try:
        user_info['Location'] = soup.find('span',class_="p-label").text
    except:
        pass
    try:
        user_info['n_repos'] = int(soup.find('span', class_='Counter').text)
    except:
        pass
    try:
        user_info['n_subscribers'] = int(soup.find('span', class_ = "text-bold color-fg-default").text)
    except:
        pass
    
    return user_info

### 2. get_user_repositories(username)
get_user_repositories(username) - возвращает список публичных репозиториев пользователя в виде списка словарей. Ключи: юзер, имя репозитория, язык.

In [46]:
def get_user_repositories(username):
    """
    This function accepts GitHub username and parses his repos names and languages.
    @ return: repos, list of dict
    """
    url = 'https://github.com/' + username + '?tab=repositories'
    req = requests.get(url)
    soup = BeautifulSoup(req.content)
    repo_soup = soup.find('div', id="user-repositories-list").find_all("li")
    repos = []
    for i in range(len(repo_soup)):
        repo_dict = {'User':username, "repo_name": None, 'lang':None}
        try:
            repo_dict["repo_name"] = repo_soup[i].find('a', itemprop="name codeRepository").text.strip()
        except:
            pass
        try:
            repo_dict['lang'] = repo_soup[i].find('span', itemprop="programmingLanguage").text
        except:
            pass
        repos.append(repo_dict)
    return repos

[{'User': 'MShtol', 'repo_name': 'BI_2021_Python', 'lang': 'Jupyter Notebook'},
 {'User': 'MShtol', 'repo_name': 'BI_ML_2021', 'lang': 'Jupyter Notebook'},
 {'User': 'MShtol', 'repo_name': 'BI_Stat_2021', 'lang': None},
 {'User': 'MShtol', 'repo_name': 'ml-mipt', 'lang': 'Jupyter Notebook'},
 {'User': 'MShtol',
  'repo_name': 'multi-omics-hackathon',
  'lang': 'Jupyter Notebook'},
 {'User': 'MShtol', 'repo_name': 'repseq-annotation-tutorial', 'lang': 'TeX'}]

### 3. list_repository_contents(username, repository, repository_path) 
возвращает список файлов и папок в репозитории repository пользователя username по пути repository_path. По сути это аналогично использованию "ls repository_path" локально, мы просто выводим список файлов по указанному пути.

In [54]:
def list_repository_contents(username, repository, repository_path = None):
    """
    This function accepts GitHub username, and repo name and returns it's content list.
    @ return: repos, list of dict
    """
    # repository_path - spare variable
    url = 'https://github.com/' + username + '/' + repository
    req = requests.get(url)
    soup = BeautifulSoup(req.content)
    rep_cont = []
    rowheads = soup.find_all('div', role='rowheader')
    for rowhead in rowheads:
        rep_cont.append(rowhead.text.strip())
    return rep_cont

['.github/workflows',
 'numpy_challenge',
 're',
 'Ci_Bq_converter.py',
 'README.md',
 'fastq_filtrator.py',
 'random.ipynb',
 'seq_magic.py']

### 4. download_file(username, repository, remote_file_path, local_file_path) 

скачивание файла, который имеет путь remote_file_path в репозитории и сохранение локально по пути local_file_path. При попытке скачать папку можно бросить исключение или же скачать всю папку.

In [56]:
def download_file(username, repository, remote_file_path, local_file_path):
    """
    This function accepts GitHub username, and repo name, remote file path (down from the level of repo name)
    and local path file. All paths should start with '/',
    Downoloads file to the local path
    """
    # repository_path - spare variable
    url = 'https://raw.githubusercontent.com/'+username+'/'+repository+remote_file_path
    req = requests.get(url)
    name = remote_file_path.split('/')[-1]
    if not req.ok:
        print("Wrong path or it's a directory")
    else:
        with open(local_file_path+name, 'wb') as f:
                f.write(req.content)
                


**Tests**

In [57]:
username = 'MShtol'
repository = 'BI_2021_Python'

In [61]:
get_user_info(username)

{'Name': 'Mikhail',
 'Organization': 'IPR RAS',
 'Location': 'Moscow',
 'n_repos': 6,
 'n_subscribers': 0}

In [59]:
get_user_repositories(username)

[{'User': 'MShtol', 'repo_name': 'BI_2021_Python', 'lang': 'Jupyter Notebook'},
 {'User': 'MShtol', 'repo_name': 'BI_ML_2021', 'lang': 'Jupyter Notebook'},
 {'User': 'MShtol', 'repo_name': 'BI_Stat_2021', 'lang': None},
 {'User': 'MShtol', 'repo_name': 'ml-mipt', 'lang': 'Jupyter Notebook'},
 {'User': 'MShtol',
  'repo_name': 'multi-omics-hackathon',
  'lang': 'Jupyter Notebook'},
 {'User': 'MShtol', 'repo_name': 'repseq-annotation-tutorial', 'lang': 'TeX'}]

In [62]:
list_repository_contents(username, repository)

['.github/workflows',
 'numpy_challenge',
 're',
 'Ci_Bq_converter.py',
 'README.md',
 'fastq_filtrator.py',
 'random.ipynb',
 'seq_magic.py']

In [63]:
download_file(username, repository, '/main/seq_magic.py', './')

In [64]:
import requests
from bs4 import BeautifulSoup

In [67]:
bs4.__version__

NameError: name 'bs4' is not defined

requests==2.27.1
beautifulsoup4==4.10.0