In [16]:
# to get API token
import os
from dotenv import load_dotenv

# to get information in URL
import requests
from bs4 import BeautifulSoup
import re

# load .env file
load_dotenv()
# GitHub API 사용하기 위한 변수 선언

GITHUB_API_URL = "https://api.github.com"
ACCESS_TOKEN = os.getenv('GITHUB_ACCESS_TOKEN')

# 헤더 설정
headers = {
    'Authorization': f'token {ACCESS_TOKEN}',
    'Accept': 'application/vnd.github.v3+json',
    'User-Agent': 'JHyuk2'
}

In [30]:
# api 사용법
!curl https://api.github.com

{
  "current_user_url": "https://api.github.com/user",
  "current_user_authorizations_html_url": "https://github.com/settings/connections/applications{/client_id}",
  "authorizations_url": "https://api.github.com/authorizations",
  "code_search_url": "https://api.github.com/search/code?q={query}{&page,per_page,sort,order}",
  "commit_search_url": "https://api.github.com/search/commits?q={query}{&page,per_page,sort,order}",
  "emails_url": "https://api.github.com/user/emails",
  "emojis_url": "https://api.github.com/emojis",
  "events_url": "https://api.github.com/events",
  "feeds_url": "https://api.github.com/feeds",
  "followers_url": "https://api.github.com/user/followers",
  "following_url": "https://api.github.com/user/following{/target}",
  "gists_url": "https://api.github.com/gists{/gist_id}",
  "hub_url": "https://api.github.com/hub",
  "issue_search_url": "https://api.github.com/search/issues?q={query}{&page,per_page,sort,order}",
  "issues_url": "https://api.github.com/issues

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  2396  100  2396    0     0  83376      0 --:--:-- --:--:-- --:--:-- 85571


In [32]:
# 사용자 인증 정보
!curl https://api.github.com/user -H "Authorization: Bearer $ACCESS_TOKEN"

{
  "login": "JHyuk2",
  "id": 60080684,
  "node_id": "MDQ6VXNlcjYwMDgwNjg0",
  "avatar_url": "https://avatars.githubusercontent.com/u/60080684?v=4",
  "gravatar_id": "",
  "url": "https://api.github.com/users/JHyuk2",
  "html_url": "https://github.com/JHyuk2",
  "followers_url": "https://api.github.com/users/JHyuk2/followers",
  "following_url": "https://api.github.com/users/JHyuk2/following{/other_user}",
  "gists_url": "https://api.github.com/users/JHyuk2/gists{/gist_id}",
  "starred_url": "https://api.github.com/users/JHyuk2/starred{/owner}{/repo}",
  "subscriptions_url": "https://api.github.com/users/JHyuk2/subscriptions",
  "organizations_url": "https://api.github.com/users/JHyuk2/orgs",
  "repos_url": "https://api.github.com/users/JHyuk2/repos",
  "events_url": "https://api.github.com/users/JHyuk2/events{/privacy}",
  "received_events_url": "https://api.github.com/users/JHyuk2/received_events",
  "type": "User",
  "site_admin": false,
  "name": null,
  "company": null,
  "blog":

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  1288  100  1288    0     0   3432      0 --:--:-- --:--:-- --:--:--  3443


In [33]:
# 그리고 귀여운 고양이.
!curl https://api.github.com/octocat


               MMM.           .MMM
               MMMMMMMMMMMMMMMMMMM
               MMMMMMMMMMMMMMMMMMM      _________________________________________
              MMMMMMMMMMMMMMMMMMMMM    |                                         |
             MMMMMMMMMMMMMMMMMMMMMMM   | Anything added dilutes everything else. |
            MMMMMMMMMMMMMMMMMMMMMMMM   |_   _____________________________________|
            MMMM::- -:::::::- -::MMMM    |/
             MM~:~ 00~:::::~ 00~:~MM
        .. MMMMM::.00:::+:::.00::MMMMM ..
              .MM::::: ._. :::::MM.
                 MMMM;:::::;MMMM
          -MM        MMMMMMM
          ^  M+     MMMMMMMMM
              MMMMMMM MM MM MM
                   MM MM MM MM
                   MM MM MM MM
                .~~MM~MM~MM~MM~~.
             ~~~~MM:~MM~~~MM~:MM~~~~
            ~~~~~~==~==~~~==~==~~~~~~
             ~~~~~~==~==~==~==~~~~~~
                 :~==~==~==~==~~


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100   925  100   925    0     0   2982      0 --:--:-- --:--:-- --:--:--  2993


### REST API - search repo and select language
- 우리가 사용할 건 repository search와 repo 안의 .py 파일을 찾는 것.  

세 가지 중 가장 잘 사용할만한 것은
> `"repository_search_url"`: "https://api.github.com/search/repositories?q={query}{&page,per_page,sort,order}",  
> `"issue_search_url"`: "https://api.github.com/search/issues?q={query}{&page,per_page,sort,order}",  
> `"topic_search_url"`: "https://api.github.com/search/topics?q={query}{&page,per_page}",  

<br>
이슈와 토픽은 잘 모르겠지만 레포는 확실하다.


In [35]:
# GitHub API를 사용하여 트렌딩 레포지토리를 검색하는 함수.
def get_trending_repositories(language='python', sort='stars', order='desc', per_page=10):
    url = "https://api.github.com/search/repositories"
    query = f"language:{language}"
    params = {
        'q': query,
        'sort': sort,
        'order': order,
        'per_page': per_page
    }
    # getter_url = 'https://api.github.com/search/repositories?q=language:{language}&sort=stars&order=desc&per_page=10'
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json()['items']
    else:
        raise Exception(f"Error fetching trending repositories: {response.status_code} - {response.text}")
    
repos = get_trending_repositories()

KeyError: 'items'

In [42]:
# 리포지토리의 코드 파일을 수집
def get_repository_files(repo_url):
    files = []
    repo_name = repo_url.split('/')[-1]
    url = f'https://api.github.com/repos/{repo_name}/contents'
    response = requests.get(url)
    print(response)
    data = response.json()
    
    for item in data:
        if item['type'] == 'file' and item['name'].endswith('.py'):
            files.append(item['download_url'])
    return files
    

In [13]:
# 리포지토리의 코드 파일을 수집
def get_repository_files(repo_url):
    files = []
    repo_name = repo_url.split('/')[-1]
    url = f'https://api.github.com/repos/{repo_name}/contents'
    response = requests.get(url)
    print(response)
    data = response.json()
    
    for item in data:
        if item['type'] == 'file' and item['name'].endswith('.py'):
            files.append(item['download_url'])
    return files

In [43]:
files = get_repository_files(repos[0]['html_url'])

<Response [404]>


TypeError: string indices must be integers

In [5]:
i = 0
for repo in repos:
    i += 1
    temp = repo['html_url']
    if i == 2:
        break

In [12]:
print(temp)
temp_url = temp

https://github.com/donnemartin/system-design-primer


In [14]:

files = get_repository_files(temp_url)

{'message': 'Not Found', 'documentation_url': 'https://docs.github.com/rest/repos/repos#get-a-repository'}


TypeError: string indices must be integers

In [11]:
# 코드 파일을 다운로드하고 변수명, 함수명, 클래스명을 추출
def extract_names_from_code(file_url):
    response = requests.get(file_url)
    code = response.text
    variables = re.findall(r'\b([a-zA-Z_][a-zA-Z0-9_]*)\b', code)
    return variables

TypeError: string indices must be integers