Conhecendo a requests

- Primeira requisição

In [1]:
import requests

In [2]:
r = requests.get('https://api.github.com/events')

In [3]:
r

<Response [200]>

Explorando a biblioteca

In [4]:
r.status_code

200

In [5]:
r.url

'https://api.github.com/events'

In [6]:
r.text



In [7]:
r.json()

[{'id': '34927169768',
  'type': 'PushEvent',
  'actor': {'id': 56163584,
   'login': 'dandibot',
   'display_login': 'dandibot',
   'gravatar_id': '',
   'url': 'https://api.github.com/users/dandibot',
   'avatar_url': 'https://avatars.githubusercontent.com/u/56163584?'},
  'repo': {'id': 651646863,
   'name': 'dandisets/000559',
   'url': 'https://api.github.com/repos/dandisets/000559'},
  'payload': {'repository_id': 651646863,
   'push_id': 16692805321,
   'size': 2,
   'distinct_size': 2,
   'ref': 'refs/heads/git-annex',
   'head': '849033687192a041d199221c30933b0b75fcd76d',
   'before': '95a6f2ef6b7fd2661407d11bee80244067185668',
   'commits': [{'sha': 'f31b1b282730268a4685ea53b4a1c46c5b35373a',
     'author': {'email': 'team@dandiarchive.org', 'name': 'DANDI Team'},
     'message': 'update',
     'distinct': True,
     'url': 'https://api.github.com/repos/dandisets/000559/commits/f31b1b282730268a4685ea53b4a1c46c5b35373a'},
    {'sha': '849033687192a041d199221c30933b0b75fcd76d',

Utilizando outro endpoint

In [8]:
r = requests.get('https://api.github.com/versions')
r.status_code

200

In [9]:
r.json()

['2022-11-28']

Extração de dados

In [10]:
headers = {'X-GitHub-Api-Version': '2022-11-28'}

In [11]:
api_base_url = 'https://api.github.com'
owner = 'amzn' # username de quem vamos extrair os dados
url = f'{api_base_url}/users/{owner}/repos'

In [12]:
url

'https://api.github.com/users/amzn/repos'

In [13]:
response = requests.get(url, headers=headers)
response.status_code

200

In [14]:
response.json()

[{'id': 171339259,
  'node_id': 'MDEwOlJlcG9zaXRvcnkxNzEzMzkyNTk=',
  'name': '.github',
  'full_name': 'amzn/.github',
  'private': False,
  'owner': {'login': 'amzn',
   'id': 8594673,
   'node_id': 'MDEyOk9yZ2FuaXphdGlvbjg1OTQ2NzM=',
   'avatar_url': 'https://avatars.githubusercontent.com/u/8594673?v=4',
   'gravatar_id': '',
   'url': 'https://api.github.com/users/amzn',
   'html_url': 'https://github.com/amzn',
   'followers_url': 'https://api.github.com/users/amzn/followers',
   'following_url': 'https://api.github.com/users/amzn/following{/other_user}',
   'gists_url': 'https://api.github.com/users/amzn/gists{/gist_id}',
   'starred_url': 'https://api.github.com/users/amzn/starred{/owner}{/repo}',
   'subscriptions_url': 'https://api.github.com/users/amzn/subscriptions',
   'organizations_url': 'https://api.github.com/users/amzn/orgs',
   'repos_url': 'https://api.github.com/users/amzn/repos',
   'events_url': 'https://api.github.com/users/amzn/events{/privacy}',
   'received_ev

In [15]:
len(response.json())

30

Autenticação

In [16]:
access_token = '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'
headers = {'Authorization' : 'Bearer ' + access_token,
            'X-GitHub-Api-Version': '2022-11-28'}

Paginação

In [17]:
api_base_url = 'https://api.github.com'
owner = 'amzn' # username de quem vamos extrair os dados
url = f'{api_base_url}/users/{owner}/repos'
url

'https://api.github.com/users/amzn/repos'

In [18]:
repos_list = []

for num_page in range(1,6):
    try:
        url_page = f'{url}?page={num_page}'
        response = requests.get(url_page, headers=headers)
        repos_list.append(response.json())
    except:
        repos_list.append(None)

In [19]:
len(repos_list)

5

In [20]:
len(repos_list[0])

30

Nome dos repositórios

In [21]:
repos_list[0][1]['name']

'ads-advanced-tools-docs'

In [24]:
repos_name = []
for page in repos_list:
    for repo in page:
        repos_name.append(repo['name'])
repos_name[:15]

['.github',
 'ads-advanced-tools-docs',
 'ads-pao-amznjs-gtm-template',
 'alexa-coho',
 'alexa-skills-kit-js',
 'amazon-ads-advertiser-audience-normalization-sdk-py',
 'amazon-advertising-api-php-sdk',
 'amazon-codeguru-profiler-for-spark',
 'amazon-frustration-free-setup-certification-tool',
 'amazon-hub-counter-api-docs',
 'amazon-hub-counter-api-samples',
 'amazon-hub-counter-sdk-java',
 'amazon-hub-support',
 'amazon-instant-access-sdk-java',
 'amazon-instant-access-sdk-net']

In [23]:
len(repos_name)

148

Linguagem do repositório

In [25]:
repos_list[1][1]['language']

'PHP'

In [26]:
repos_language = []
for page in repos_list:
    for repo in page:
        repos_language.append(repo['language'])
repos_language[:15]

[None,
 None,
 'Smarty',
 'JavaScript',
 None,
 'Python',
 'PHP',
 'Java',
 'Python',
 'CSS',
 'Java',
 'Java',
 'PowerShell',
 'Java',
 'C#']

In [27]:
len(repos_language)

148

Criando um DataFrame com Pandas

In [28]:
import pandas as pd 

In [29]:
dados_amz = pd.DataFrame()
dados_amz['repository_name'] = repos_name
dados_amz['language'] = repos_language
dados_amz

Unnamed: 0,repository_name,language
0,.github,
1,ads-advanced-tools-docs,
2,ads-pao-amznjs-gtm-template,Smarty
3,alexa-coho,JavaScript
4,alexa-skills-kit-js,
...,...,...
143,zeek-plugin-enip,Zeek
144,zeek-plugin-profinet,Zeek
145,zeek-plugin-s7comm,Zeek
146,zeek-plugin-tds,Zeek


Salvando o DataFrame

In [30]:
dados_amz.to_csv('amazon.csv')

Criando repositório com POST

In [31]:
api_base_url = 'https://api.github.com'
url = f'{api_base_url}/user/repos'

url

'https://api.github.com/user/repos'

In [32]:
data = {
    'name': 'linguagens-utilizadas',
    'description': 'Repositorio com as linguagens utilizadas na Amazon, utilizando requests e pandas',
    'private': False
}

response = requests.post(url, json=data, headers=headers)
response.status_code

201

Formato do arquivo

In [33]:
import base64

In [34]:
with open('amazon.csv', 'rb') as file:
    file_content = file.read()

encoded_content = base64.b64encode(file_content)

Upload do arquivo com PUT


In [35]:
api_base_url = 'https://api.github.com'
username = 'ChangCarlos'
repo = 'linguagens-utilizadas'
path = 'amazon.csv'

url = f'{api_base_url}/repos/{username}/{repo}/contents/{path}'
url

'https://api.github.com/repos/ChangCarlos/linguagens-utilizadas/contents/amazon.csv'

In [37]:
data = {
    'message': 'Adicionando um novo arquivo no formato .csv',
    'content': encoded_content.decode('utf-8')
}

response = requests.put(url, json=data, headers=headers)
response.status_code

422