# Pesquisando por iniciativas/projetos com Dados Abertos Governamentais no Github

In [67]:
import requests
import pandas as pd
import time

TODO:

**Mapeamento das principais bibliotecas**

- [API da Câmara dos deputados](https://dadosabertos.camara.leg.br/swagger/api.html)

In [68]:
search_strings_libraries = []

Devido a estrutura de pesquisa no github as palavras chaves utilizadas para o 
news não foram tão eficiente, visto que preciso adicionar palavras de busca ou 
de forma conter uma das palavras ou conter todas as palavras na sequencia especificada.

Sendo assim:
- '"projeto" and "dados governo"',
- '"projeto" and "dados governamentais"',
- '"projeto" and "monitora" and "dados" and "governo"'

Não foram buscas efetivas, porém as palavras chaves inicias sim.

In [69]:
search_strings = [
            'dados abertos',
            'dados abertos brasil',
            'dados abertos governo',
            'dados abertos governamentais',
            'dados governamentais',
            'dados publicos abertos',
            'dados do governo',
            'analise de dados do governo',
            'analise de dados governamentais',
            'portal de dados do governo',
            'portal de dados governamentais',
            'portal publico do governo',
            'portal de dados abertos do governo',
        ]

'consumir' e 'publicação' não retornaram resultados e por isso foram retirados a fim de adicionar palavras que retornem resultados melhores
- 'consumir dados abertos do governo'
- 'consumir dados abertos governamentais'
- 'publicação de dados abertos do governo',
- 'publicação de dados governamentais',

In [74]:
sort = '&sort=stars&order=desc'
url_base = 'https://api.github.com/search/repositories?q='
credentials=('lorenaps','')

Verificando o limite de requisições

In [75]:
t = requests.get('https://api.github.com/rate_limit', auth=credentials)
t.json()

{'rate': {'limit': 5000, 'remaining': 5000, 'reset': 1573219538},
 'resources': {'core': {'limit': 5000, 'remaining': 5000, 'reset': 1573219538},
  'graphql': {'limit': 5000, 'remaining': 5000, 'reset': 1573219538},
  'integration_manifest': {'limit': 5000,
   'remaining': 5000,
   'reset': 1573219538},
  'search': {'limit': 30, 'remaining': 30, 'reset': 1573215998}}}

In [76]:
columns=[
        'id',
        'full_name',
        'description',
        'owner_type', 
        'owner_api_url',
        'owner_url',
        'url',
        'api_url',
        'fork',
        'created_at',
        'updated_at',
        'size',
        'stargazers_count',
        'language',
        'has_issues',
        'has_wiki',
        'forks_count',
        'forks',
        'open_issues',
        'watchers',
        'timestamp_extract'
    ]


In [77]:
def add_result(item):
    
    df = pd.DataFrame([[
                        item.get('id'),
                        item.get('full_name', None),
                        item.get('description', None),      
                        item.get('owner').get('type', None),
                        item.get('owner').get('url', None),
                        item.get('owner').get('html_url', None),
                        item.get('html_url', None),
                        item.get('url', None),
                        item.get('fork', None),
                        item.get('created_at', None),
                        item.get('updated_at', None),
                        item.get('size', None),
                        item.get('stargazers_count', None),
                        item.get('language', None),
                        item.get('has_issues', None),
                        item.get('has_wiki', None),
                        item.get('forks_count', None),
                        item.get('forks', None),
                        item.get('open_issues', None),
                        item.get('watchers', None),
                        str(time.time()).split('.')[0]]], columns=columns)

    return df    

In [78]:
def extract_results(data, results):
    
    for item in data.get('items', None):
        
        results = pd.concat([results, add_result(item)], ignore_index=True, sort=False)

    return results

Verificando limitação de extração de dados da API

In [79]:
page_35 = 'https://api.github.com/search/repositories?q=stars%3A%3E1&sort=stars&order=desc&page=35'
t = requests.get(page_35, auth=credentials)
t.json()

{'documentation_url': 'https://developer.github.com/v3/search/',
 'message': 'Only the first 1000 search results are available'}

In [80]:
repositories_df = pd.DataFrame(columns=['id',
                                    'full_name',
                                    'description',
                                    'owner_type', 
                                    'owner_api_url',
                                    'owner_url',
                                    'url',
                                    'api_url',
                                    'fork',
                                    'created_at',
                                    'updated_at',
                                    'size',
                                    'stargazers_count',
                                    'language',
                                    'has_issues',
                                    'has_wiki',
                                    'forks_count',
                                    'forks',
                                    'open_issues',
                                    'watchers',
                                    'timestamp_extract',
                                    'commits',
                                    'contributors',]) 

In [81]:
def scroll_pages(url, repositories_df):
    
    print('\nPrimeira requisição')
    
    results = requests.get(url, auth=credentials)    
    data = dict(results.json())
    total = data.get('total_count', None)
        
    print(">>> Foram encontrados {0} resultados. Extraindo...".format(total))

    repositories_df = extract_results(data, repositories_df)
    
    iterations = total // 30 
    
    for iteracao in range(0, iterations):        
        header = dict(results.links)
        
        if header.get('next', False):
            next_url = header.get('next').get('url')
            
            print("\nNext url: {0}".format(next_url))
            
            results = requests.get(next_url, auth=credentials)
            data = dict(results.json())
            repositories_df = extract_results(data, repositories_df)
        
    return repositories_df

In [82]:
%%time

for string in search_strings:
    url = url_base + string + sort
    print("\nExtraindo repositórios para a string: '{0}'".format(string))
    repositories_df = scroll_pages(url, repositories_df)


Extraindo repositórios para a string: 'dados abertos'

Primeira requisição
>>> Foram encontrados 370 resultados. Extraindo...

Next url: https://api.github.com/search/repositories?q=dados+abertos&sort=stars&order=desc&page=2

Next url: https://api.github.com/search/repositories?q=dados+abertos&sort=stars&order=desc&page=3

Next url: https://api.github.com/search/repositories?q=dados+abertos&sort=stars&order=desc&page=4

Next url: https://api.github.com/search/repositories?q=dados+abertos&sort=stars&order=desc&page=5

Next url: https://api.github.com/search/repositories?q=dados+abertos&sort=stars&order=desc&page=6

Next url: https://api.github.com/search/repositories?q=dados+abertos&sort=stars&order=desc&page=7

Next url: https://api.github.com/search/repositories?q=dados+abertos&sort=stars&order=desc&page=8

Next url: https://api.github.com/search/repositories?q=dados+abertos&sort=stars&order=desc&page=9

Next url: https://api.github.com/search/repositories?q=dados+abertos&sort=stars&

In [83]:
repositories_df.describe()

Unnamed: 0,id,full_name,description,owner_type,owner_api_url,owner_url,url,api_url,fork,created_at,...,language,has_issues,has_wiki,forks_count,forks,open_issues,watchers,timestamp_extract,commits,contributors
count,570,570,534,570,570,570,570,570,570,570,...,424,570,570,570,570,570,570,570,0.0,0.0
unique,404,404,370,2,354,354,404,404,1,404,...,22,2,2,16,16,14,25,28,0.0,0.0
top,195294598,edsonmottac/go-to-doc,Plugin/Tema do Portal de Dados Abertos do Gove...,User,https://api.github.com/users/dadosgovbr,https://github.com/dadosgovbr,https://github.com/edsonmottac/go-to-doc,https://api.github.com/repos/edsonmottac/go-to...,False,2017-08-23T19:05:48Z,...,Python,True,True,0,0,0,0,1573215966,,
freq,6,6,6,466,16,16,6,6,570,6,...,92,555,553,424,424,488,395,30,,


## Extraindo Commits e Contributors

In [84]:
def extract_commits(url_repo):
    
    commits_url = url_repo + '/commits'  
    results = requests.get(commits_url, auth=credentials)
    
    # No caso do repositório estar vazio
    if results.status_code == 409:
        return None
    
    commits = len(results.json())

    header = dict(results.links)
    
    while header.get('next', False):
        next_url = header.get('next').get('url')        
        results = requests.get(next_url, auth=credentials)
        commits = commits + len(results.json())    
        header = dict(results.links)


    return commits

In [85]:
def extract_contributors(url_repo):
    
    contributors_url = url_repo + '/contributors'
    results = requests.get(contributors_url, auth=credentials)
    
    # No caso do repositório estar vazio
    if results.status_code == 204:
        return None
    
    contributors = len(results.json())

    header = dict(results.links)
    
    while header.get('next', False):
        next_url = header.get('next').get('url')
        results = requests.get(next_url, auth=credentials)
        contributors = contributors + len(results.json())
        header = dict(results.links)
    
    return contributors

In [86]:
%%time
urls = repositories_df['api_url']

for url in urls:
        
    print('\n>>> ', url)
    
    repo = requests.get(url, auth=credentials)
    repo = dict(repo.json())
        
    commits = extract_commits(url)
    contributors = extract_contributors(url)

    print("Tem {0} Commits - {1} Contributors".format(commits,contributors))

    repositories_df.loc[repositories_df["api_url"] == url, 'commits'] = commits
    repositories_df.loc[repositories_df["api_url"] == url, 'contributors'] = contributors


>>>  https://api.github.com/repos/CamaraDosDeputados/dados-abertos
Tem 28 Commits - 4 Contributors

>>>  https://api.github.com/repos/dadosgovbr/catalogos-dados-brasil
Tem 42 Commits - 4 Contributors

>>>  https://api.github.com/repos/prefeiturasp/dados-educacao
Tem 18 Commits - 2 Contributors

>>>  https://api.github.com/repos/dadosgovbr/aplicativos-dados-brasil
Tem 40 Commits - 5 Contributors

>>>  https://api.github.com/repos/dadosgovbr/kit
Tem 361 Commits - 6 Contributors

>>>  https://api.github.com/repos/mapaslivres/localidades
Tem 69 Commits - 5 Contributors

>>>  https://api.github.com/repos/odufrn/odufrn-downloader
Tem 237 Commits - 6 Contributors

>>>  https://api.github.com/repos/brasilopen/brasilopen
Tem 10 Commits - 2 Contributors

>>>  https://api.github.com/repos/vitorbaptista/dados-abertos-camara.gov.br
Tem 31 Commits - 1 Contributors

>>>  https://api.github.com/repos/jonny-data/conheca-seu-vereador
Tem 70 Commits - 6 Contributors

>>>  https://api.github.com/repos/da

Tem 85 Commits - 2 Contributors

>>>  https://api.github.com/repos/vgeorge/estados-brasileiros
Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos/paraibatransparente/dados
Tem 95 Commits - 2 Contributors

>>>  https://api.github.com/repos/CodeForCuritiba/Analises-Jupyter-Notebook
Tem 2 Commits - 1 Contributors

>>>  https://api.github.com/repos/grstavares/DadosAbertosTSE
Tem 5 Commits - 1 Contributors

>>>  https://api.github.com/repos/lucassouzamatos/projetos-ufsc
Tem 42 Commits - 1 Contributors

>>>  https://api.github.com/repos/pr-snas/nosmapas
Tem 18 Commits - 1 Contributors

>>>  https://api.github.com/repos/msramos/lupa
Tem 24 Commits - 1 Contributors

>>>  https://api.github.com/repos/sergiosvieira/cerificados
Tem 5 Commits - 1 Contributors

>>>  https://api.github.com/repos/fhpimenta/AviacaoOpenData
Tem 14 Commits - 1 Contributors

>>>  https://api.github.com/repos/VRPazdeJesus/multas-ibama-firebase
Tem 2 Commits - 1 Contributors

>>>  https://api.github.com/repo

Tem 8 Commits - 1 Contributors

>>>  https://api.github.com/repos/georgemaia/dadosabertos
Tem 31 Commits - 1 Contributors

>>>  https://api.github.com/repos/petersonjr/dados_abertos
Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos/transparencia-mg/blog-dados-abertos
Tem 4 Commits - 1 Contributors

>>>  https://api.github.com/repos/GabrielLimaSnT/ProjetoDadosAbertosImpacta
Tem 74 Commits - 3 Contributors

>>>  https://api.github.com/repos/mhbsti/dados-abertos-curitiba
Tem 28 Commits - 1 Contributors

>>>  https://api.github.com/repos/natalialionel/Dados-Abertos-Desafios-e-Oportunidades
Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos/alexvlima/MTur-Dados-Turismo-por-UF
Tem 2 Commits - 1 Contributors

>>>  https://api.github.com/repos/thyall/Dados_UFRN
Tem 23 Commits - 3 Contributors

>>>  https://api.github.com/repos/dcarvalho/analise_dados_abertos
Tem 7 Commits - 2 Contributors

>>>  https://api.github.com/repos/comprasgovbr/API-dados-abertos
Tem 1 Com

Tem 47 Commits - 1 Contributors

>>>  https://api.github.com/repos/bozoh/meu-legislador
Tem 25 Commits - 1 Contributors

>>>  https://api.github.com/repos/marceloandriolli/dados_solarimetricos_brasil
Tem 3 Commits - 1 Contributors

>>>  https://api.github.com/repos/ggdrn/ClimMapView
Tem 10 Commits - 1 Contributors

>>>  https://api.github.com/repos/crislanio/Dados_abertos_PortalDaTransparencia
Tem 5 Commits - 1 Contributors

>>>  https://api.github.com/repos/mauriciovll/Dados-Abertos-SC-versao-TESTE-
Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos/mateushtoledo/sysvis
Tem 3 Commits - 1 Contributors

>>>  https://api.github.com/repos/DeehSlash/EstruturaDeDadosHash
Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos/danielCavalcanti553/angular-api-dados-iffar
Tem 5 Commits - 1 Contributors

>>>  https://api.github.com/repos/georgemaia/dadosabertos-mobilidade-rn-natal
Tem 18 Commits - 1 Contributors

>>>  https://api.github.com/repos/FabianoOLima/1_Dados-e-

Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos/juliafealves/leda-tabela-hash-aberto
Tem 5 Commits - 0 Contributors

>>>  https://api.github.com/repos/BrunoMoriB/stockmarket-manager
Tem 23 Commits - 1 Contributors

>>>  https://api.github.com/repos/hiperorganicos/openlab_datavis
Tem 5 Commits - 1 Contributors

>>>  https://api.github.com/repos/code-like-a-girl/hacker-cidadao-3.0
Tem 2 Commits - 1 Contributors

>>>  https://api.github.com/repos/NastyaCodingBunny/PYTHON---SIMULADOR-DE-CADASTRO
Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos/wharborges/analiseES
Tem None Commits - None Contributors

>>>  https://api.github.com/repos/VRPazdeJesus/multas-ibama
Tem 2 Commits - 1 Contributors

>>>  https://api.github.com/repos/decosoares/diariasservidoresalagoas
Tem 18911 Commits - 315 Contributors

>>>  https://api.github.com/repos/leonardocouy/MaisCopa
Tem 9 Commits - 1 Contributors

>>>  https://api.github.com/repos/Idiego2/Dashboard-PHP-e-MySQL
Tem 6 Co

Tem 19 Commits - 1 Contributors

>>>  https://api.github.com/repos/gabrielborgesdm/lunetagov
Tem 19 Commits - 1 Contributors

>>>  https://api.github.com/repos/leilton/PJIN
Tem 4 Commits - 1 Contributors

>>>  https://api.github.com/repos/desireesantos/plataformaVisualizacao
Tem 35 Commits - 1 Contributors

>>>  https://api.github.com/repos/Lorenaps/caracterizacao-ds-dados-abertos
Tem 30 Commits - 1 Contributors

>>>  https://api.github.com/repos/hitalosiqueira/DAG
Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos/diegoep/rotasaude
Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos/EnriqueSampaio/dag-parser
Tem 39 Commits - 1 Contributors

>>>  https://api.github.com/repos/edsonmottac/go-to-doc
Tem 5 Commits - 0 Contributors

>>>  https://api.github.com/repos/narallynne/mineracao-ogd
Tem 3 Commits - 2 Contributors

>>>  https://api.github.com/repos/mauriciovll/Dados-Abertos-SC-versao-TESTE-
Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos

Tem None Commits - None Contributors

>>>  https://api.github.com/repos/paulohubert/resultado_primario
Tem 4 Commits - 1 Contributors

>>>  https://api.github.com/repos/Panizzo/transparencia
Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos/marcotmotta/ibd-query-project
Tem 7 Commits - 1 Contributors

>>>  https://api.github.com/repos/antonivargas/desafio-camara
Tem 13 Commits - 1 Contributors

>>>  https://api.github.com/repos/ppKrauss/getcsv_stdOpenGov
Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos/ces-jf/PSPA
Tem 34 Commits - 2 Contributors

>>>  https://api.github.com/repos/VitorRoque/Analise-Fila-o-Politica-BR
Tem 1 Commits - 1 Contributors

>>>  https://api.github.com/repos/danielmarcelino/rRAIS
Tem 2 Commits - 1 Contributors

>>>  https://api.github.com/repos/nosdeedson/Projeto-leitura-de-API-com-Docker
Tem 10 Commits - 1 Contributors

>>>  https://api.github.com/repos/chrmorais/scrapygdf
Tem 6 Commits - 1 Contributors

>>>  https://api.github.

In [87]:
repositories_df.describe()

Unnamed: 0,id,full_name,description,owner_type,owner_api_url,owner_url,url,api_url,fork,created_at,...,language,has_issues,has_wiki,forks_count,forks,open_issues,watchers,timestamp_extract,commits,contributors
count,570,570,534,570,570,570,570,570,570,570,...,424,570,570,570,570,570,570,570,549,549
unique,404,404,370,2,354,354,404,404,1,404,...,22,2,2,16,16,14,25,28,83,9
top,195294598,edsonmottac/go-to-doc,Plugin/Tema do Portal de Dados Abertos do Gove...,User,https://api.github.com/users/dadosgovbr,https://github.com/dadosgovbr,https://github.com/edsonmottac/go-to-doc,https://api.github.com/repos/edsonmottac/go-to...,False,2017-08-23T19:05:48Z,...,Python,True,True,0,0,0,0,1573215966,1,1
freq,6,6,6,466,16,16,6,6,570,6,...,92,555,553,424,424,488,395,30,69,405


Conferindo valores nulos

In [88]:
repositories_df.loc[repositories_df['commits'].isnull()][['api_url', 'commits', 'contributors']]

Unnamed: 0,api_url,commits,contributors
146,https://api.github.com/repos/renatachagasc/Api...,,
152,https://api.github.com/repos/ccdpoa/ocida,,
159,https://api.github.com/repos/Kassio-Ferreira/p...,,
173,https://api.github.com/repos/GabrielLimaSnT/Pr...,,
203,https://api.github.com/repos/hugomsouto/minera...,,
211,https://api.github.com/repos/DINALVAGOMES/INSS...,,
213,https://api.github.com/repos/eduponto21/dados_...,,
226,https://api.github.com/repos/MarxSteel/MDIO-In...,,
230,https://api.github.com/repos/yelken/livecity,,
276,https://api.github.com/repos/danielmbicalho/Da...,,


In [89]:
repositories_df.loc[repositories_df['contributors'].isnull()][['api_url', 'commits', 'contributors']]

Unnamed: 0,api_url,commits,contributors
146,https://api.github.com/repos/renatachagasc/Api...,,
152,https://api.github.com/repos/ccdpoa/ocida,,
159,https://api.github.com/repos/Kassio-Ferreira/p...,,
173,https://api.github.com/repos/GabrielLimaSnT/Pr...,,
203,https://api.github.com/repos/hugomsouto/minera...,,
211,https://api.github.com/repos/DINALVAGOMES/INSS...,,
213,https://api.github.com/repos/eduponto21/dados_...,,
226,https://api.github.com/repos/MarxSteel/MDIO-In...,,
230,https://api.github.com/repos/yelken/livecity,,
276,https://api.github.com/repos/danielmbicalho/Da...,,


In [90]:
file_path = '../data/repositories_' + str(time.time()).split('.')[0] + '.csv'
file_path

'../data/repositories_1573218408.csv'

In [91]:
repositories_df.to_csv(file_path, index=False)

## Extraindo contribuidores dos repositórios

In [47]:
columns_contributors = [ 'repo_id',
                          'repo_name',
                          'repo_url',
                          'repo_api_url',
                          'contributor_id',
                          'contributor_login',
                          'contributor_type',
                          'contributor_url',
                          'contributor_api_url',
                          'timestamp_extract']

In [48]:
def scroll_results(results):
    
    list_contributors = []

    for result in results:
        contributor = {}
        
        contributor = {
            'contributor_id': result.get('id', None),
            'contributor_login': result.get('login', None),
            'contributor_type': result.get('type', None),
            'contributor_url': result.get('html_url', None),
            'contributor_api_url': result.get('url', None),
        }

        list_contributors.append(contributor)
        
    return list_contributors

In [49]:
def get_contributors(url):

    list_contributors = []
    results = requests.get(url, auth=credentials)
    
    # No caso do repositório estar vazio
    if results.status_code is 204:
        return None
    
    contributors_results = results.json()
    list_contributors = scroll_results(contributors_results)
    
    header = dict(results.links)
    
    while header.get('next', False):
        next_url = header.get('next').get('url')
        
        print('\t> Extraindo da próxima página: {0}'.format(next_url))
            
        results = requests.get(next_url, auth=credentials)
        contributors_results = results.json()
        list_contributors = list_contributors + scroll_results(contributors_results)    
        header = dict(results.links)
        
    return list_contributors

In [50]:
def add_contributor(repo, contributor):
            
    df = pd.DataFrame([[
                    repo.get('repo_id', None),
                    repo.get('repo_name', None),
                    repo.get('repo_url', None),      
                    repo.get('repo_api_url', None),
                    contributor.get('contributor_id', None),
                    contributor.get('contributor_login', None),
                    contributor.get('contributor_type', None),
                    contributor.get('contributor_url', None),
                    contributor.get('contributor_api_url', None),
                    str(time.time()).split('.')[0]]], columns=columns_contributors)

    return df  

In [51]:
def save_contributors(contributors_df, repo, contributors):
    
    for contributor in contributors:
                
        contributors_df = pd.concat([contributors_df, 
                                     add_contributor(repo, contributor)], 
                                    ignore_index=True, 
                                    sort=False)
    return contributors_df

In [53]:
def search_contributors(repositories_df):
    
    contributors_df = pd.DataFrame(columns = columns_contributors)
    urls = repositories_df['api_url']
   
    for url in urls:
        print('\nExtraindo contribuidores de: {0}'.format(url))

        repo = {
            'repo_id': repositories_df.loc[repositories_df["api_url"] == url, 'id'].values[0],
            'repo_name': repositories_df.loc[repositories_df["api_url"] == url, 'full_name'].values[0],
            'repo_url': repositories_df.loc[repositories_df["api_url"] == url, 'url'].values[0],
            'repo_api_url': url,
        }
        
        url_contributors = url + '/contributors'        
        contributors = get_contributors(url_contributors)
        
        if contributors:
            contributors_df = save_contributors(contributors_df, repo, contributors)
        
    
    return contributors_df

In [55]:
%%time
result_contributors = search_contributors(repositories_df)


Extraindo contribuidores de: https://api.github.com/repos/CamaraDosDeputados/dados-abertos

Extraindo contribuidores de: https://api.github.com/repos/dadosgovbr/catalogos-dados-brasil

Extraindo contribuidores de: https://api.github.com/repos/prefeiturasp/dados-educacao

Extraindo contribuidores de: https://api.github.com/repos/dadosgovbr/aplicativos-dados-brasil

Extraindo contribuidores de: https://api.github.com/repos/dadosgovbr/kit

Extraindo contribuidores de: https://api.github.com/repos/mapaslivres/localidades

Extraindo contribuidores de: https://api.github.com/repos/odufrn/odufrn-downloader

Extraindo contribuidores de: https://api.github.com/repos/brasilopen/brasilopen

Extraindo contribuidores de: https://api.github.com/repos/vitorbaptista/dados-abertos-camara.gov.br

Extraindo contribuidores de: https://api.github.com/repos/jonny-data/conheca-seu-vereador

Extraindo contribuidores de: https://api.github.com/repos/dadosgovbr/processo-participacao-social-inda

Extraindo cont


Extraindo contribuidores de: https://api.github.com/repos/fredbortolato/dados-abertos-alesp

Extraindo contribuidores de: https://api.github.com/repos/erickos/Case_BibliotecaUFRN_DadosAbertos

Extraindo contribuidores de: https://api.github.com/repos/Ermesoml/Dados-Abertos-Camara-Legislativa

Extraindo contribuidores de: https://api.github.com/repos/pbaesse/plataforma-livre-dados-abertos

Extraindo contribuidores de: https://api.github.com/repos/paulochf/anp-dados-abertos

Extraindo contribuidores de: https://api.github.com/repos/sjcdigital/PlanoDadosAbertos

Extraindo contribuidores de: https://api.github.com/repos/CodeForCuritiba/ds_curitiba_dados_abertos

Extraindo contribuidores de: https://api.github.com/repos/codigourbano/distritos-sp

Extraindo contribuidores de: https://api.github.com/repos/dfalbel/cota-parlamentar

Extraindo contribuidores de: https://api.github.com/repos/MarcosTrajano/ionicTabelaFipe

Extraindo contribuidores de: https://api.github.com/repos/matheusjacobina/


Extraindo contribuidores de: https://api.github.com/repos/chrislucas/explorando-dados-abertos-gov

Extraindo contribuidores de: https://api.github.com/repos/ptcampos/dados-abertos-graphql

Extraindo contribuidores de: https://api.github.com/repos/henriquepgomide/dados-abertos-educacao

Extraindo contribuidores de: https://api.github.com/repos/baiana/Explorando-dados-abertos

Extraindo contribuidores de: https://api.github.com/repos/pedroboaron/ConsultaDadosAbertos

Extraindo contribuidores de: https://api.github.com/repos/pstwh/curitiba-dados-abertos-api

Extraindo contribuidores de: https://api.github.com/repos/gcianfarani/jogo-dados-abertos

Extraindo contribuidores de: https://api.github.com/repos/jaraujouerj/Estruturas-de-Dados-Abertos

Extraindo contribuidores de: https://api.github.com/repos/ricardopoppi/Consulta-INDA

Extraindo contribuidores de: https://api.github.com/repos/vpcsilva/dados-abertos-cd

Extraindo contribuidores de: https://api.github.com/repos/franklinmatheus/dad


Extraindo contribuidores de: https://api.github.com/repos/hirobs/cnj-bi

Extraindo contribuidores de: https://api.github.com/repos/FrancinaldoCabral/projetocientifico

Extraindo contribuidores de: https://api.github.com/repos/prodest/ckanext-data_es_theme

Extraindo contribuidores de: https://api.github.com/repos/FABdadosabertos/EMAER

Extraindo contribuidores de: https://api.github.com/repos/macecchi/monografia-dcc

Extraindo contribuidores de: https://api.github.com/repos/hitalosiqueira/DAG

Extraindo contribuidores de: https://api.github.com/repos/jukabarros/denguezone-api

Extraindo contribuidores de: https://api.github.com/repos/ProjetosDeBD-2018-2/tce-pe

Extraindo contribuidores de: https://api.github.com/repos/Fabriciobarbosa/pesquisajupyter

Extraindo contribuidores de: https://api.github.com/repos/hdusantos/opencam

Extraindo contribuidores de: https://api.github.com/repos/campagnucci/exercitando_pandas

Extraindo contribuidores de: https://api.github.com/repos/posGraduacaoB


Extraindo contribuidores de: https://api.github.com/repos/rafabrayner/importador_licitacoes_municipais_sagres

Extraindo contribuidores de: https://api.github.com/repos/Gabriellm2003/Coffee-CNN

Extraindo contribuidores de: https://api.github.com/repos/Pinalli/ibm-atm-challenge

Extraindo contribuidores de: https://api.github.com/repos/warSantos/REDES02

Extraindo contribuidores de: https://api.github.com/repos/KassioVieira/starships

Extraindo contribuidores de: https://api.github.com/repos/ThomasMGuarnieri/GameReviewGender

Extraindo contribuidores de: https://api.github.com/repos/ronaldosampaio/testefrontend

Extraindo contribuidores de: https://api.github.com/repos/nathaliaSilva/PDFRelat-rios

Extraindo contribuidores de: https://api.github.com/repos/dadosgovbr/catalogos-dados-brasil

Extraindo contribuidores de: https://api.github.com/repos/dadosgovbr/aplicativos-dados-brasil

Extraindo contribuidores de: https://api.github.com/repos/mapaslivres/localidades

Extraindo contribuido


Extraindo contribuidores de: https://api.github.com/repos/Jucojuco/midia_social

Extraindo contribuidores de: https://api.github.com/repos/hitalosiqueira/DAG

Extraindo contribuidores de: https://api.github.com/repos/narallynne/mineracao-ogd

Extraindo contribuidores de: https://api.github.com/repos/rafaelodon/detector-fraudes-compras-gov

Extraindo contribuidores de: https://api.github.com/repos/mauriciovll/Dados-Abertos-SC-versao-TESTE-

Extraindo contribuidores de: https://api.github.com/repos/augusto-herrmann/transparencia-dados-abertos-brasil

Extraindo contribuidores de: https://api.github.com/repos/paraibatransparente/portal-symfony

Extraindo contribuidores de: https://api.github.com/repos/gilsondev/awesome-projects

Extraindo contribuidores de: https://api.github.com/repos/othoncampos/PublicDataScience

Extraindo contribuidores de: https://api.github.com/repos/Yandson/transparencia-ma

Extraindo contribuidores de: https://api.github.com/repos/xsolium360/transparencia-ma

Extr


Extraindo contribuidores de: https://api.github.com/repos/BrunnaMaiaradaSilva/Analise-de-Dados-Com-R

Extraindo contribuidores de: https://api.github.com/repos/bovino/cidprojetoaplicado

Extraindo contribuidores de: https://api.github.com/repos/geraldo7junior/-chega-

Extraindo contribuidores de: https://api.github.com/repos/edsonmottac/go-to-doc

Extraindo contribuidores de: https://api.github.com/repos/dadosgovbr/ckanext-dadosgovbr

Extraindo contribuidores de: https://api.github.com/repos/thenets/ckanext-dadosabertos

Extraindo contribuidores de: https://api.github.com/repos/mtrpires/raspafamilia

Extraindo contribuidores de: https://api.github.com/repos/edsonlead/gastos-gov-federal

Extraindo contribuidores de: https://api.github.com/repos/Macmod/PortalTransparenciaBR

Extraindo contribuidores de: https://api.github.com/repos/prodest/dados.es.gov.br

Extraindo contribuidores de: https://api.github.com/repos/mateushtoledo/sysvis

Extraindo contribuidores de: https://api.github.com/

In [56]:
result_contributors.describe()

Unnamed: 0,repo_id,repo_name,repo_url,repo_api_url,contributor_id,contributor_login,contributor_type,contributor_url,contributor_api_url,timestamp_extract
count,1076,1076,1076,1076,1076,1076,1076,1076,1076,1076
unique,386,386,386,386,773,773,2,773,773,401
top,86646585,decosoares/diariasservidoresalagoas,https://github.com/decosoares/diariasservidore...,https://api.github.com/repos/decosoares/diaria...,1058414,augusto-herrmann,User,https://github.com/augusto-herrmann,https://api.github.com/users/augusto-herrmann,1573185522
freq,315,315,315,315,21,21,1073,21,21,232


In [57]:
result_contributors.head()

Unnamed: 0,repo_id,repo_name,repo_url,repo_api_url,contributor_id,contributor_login,contributor_type,contributor_url,contributor_api_url,timestamp_extract
0,29256552,CamaraDosDeputados/dados-abertos,https://github.com/CamaraDosDeputados/dados-ab...,https://api.github.com/repos/CamaraDosDeputado...,19875696,FabricioRocha,User,https://github.com/FabricioRocha,https://api.github.com/users/FabricioRocha,1573185219
1,29256552,CamaraDosDeputados/dados-abertos,https://github.com/CamaraDosDeputados/dados-ab...,https://api.github.com/repos/CamaraDosDeputado...,16920325,EquipeDadosAbertosCD,User,https://github.com/EquipeDadosAbertosCD,https://api.github.com/users/EquipeDadosAbertosCD,1573185219
2,29256552,CamaraDosDeputados/dados-abertos,https://github.com/CamaraDosDeputados/dados-ab...,https://api.github.com/repos/CamaraDosDeputado...,19963732,JoaoCarabetta,User,https://github.com/JoaoCarabetta,https://api.github.com/users/JoaoCarabetta,1573185219
3,29256552,CamaraDosDeputados/dados-abertos,https://github.com/CamaraDosDeputados/dados-ab...,https://api.github.com/repos/CamaraDosDeputado...,7976552,labhacker,User,https://github.com/labhacker,https://api.github.com/users/labhacker,1573185219
4,39256926,dadosgovbr/catalogos-dados-brasil,https://github.com/dadosgovbr/catalogos-dados-...,https://api.github.com/repos/dadosgovbr/catalo...,1058414,augusto-herrmann,User,https://github.com/augusto-herrmann,https://api.github.com/users/augusto-herrmann,1573185220


In [58]:
file_path = '../data/contributors_' + str(time.time()).split('.')[0] + '.csv'
file_path

'../data/contributors_1573185829.csv'

In [59]:
result_contributors.to_csv(file_path, index=False)