In [1]:
# This script requires github3.py version 0.9.6
# pip install github3.py

import os
import pandas as pd
from github3 import login

GITHUB_API_TOKEN = os.environ.get('GITHUB_API_TOKEN')
gh = login(token=GITHUB_API_TOKEN)
LOCATION = 'Recife'

def queries_list(location=LOCATION):
    return (
        'location:{} followers:>1'.format(location),
        'location:{} followers:1'.format(location),
        'location:{} followers:0'.format(location),
    )

def search_users(query):
    return [u.user for u in gh.search_users(query, sort='followers')]


def fetch_gh_users():
    return [search_users(q) for q in queries_list()]


def create_users_df(batches):
    return pd.concat(
        [pd.DataFrame({'User': u}) for u in batches],
        ignore_index=True)


def followers_iter(gh_user):
    return ','.join([str(u) for u in gh_user.iter_followers()])


def map_f_batch(map_fn, users_iterator):
    return pd.DataFrame({'Followers': [map_fn(u) for u in users_iterator]})


def create_followers_df(batches):
    return pd.concat(
        [map_f_batch(followers_iter, u) for u in batches],
        ignore_index=True)

In [3]:
def repositories_iter(gh_user, repo_type):
    return ','.join([str(u) for u in gh.iter_user_repos(gh_user, type=repo_type)])

def map_r_batch(map_fn, users_iterator):
    return pd.DataFrame({'Owner':[map_fn(u, 'owner') for u in users_iterator],
                         'Member': [map_fn(u, 'member') for u in users_iterator]})

def create_repositories_df(batches):
    return pd.concat(
        [map_r_batch(repositories_iter, u) for u in batches], ignore_index=True)

In [5]:
ghusers_batches = fetch_gh_users()

gh_users = create_users_df(ghusers_batches)

gh_followers = create_followers_df(ghusers_batches)
gh_followers.columns = ['All_Followers']

In [24]:
gh_repositories = create_repositories_df(ghusers_batches)
gh_repositories.columns = ['Owner', 'Member']
gh_repositories.to_csv('gh_repositories.csv', index=False)

In [7]:
gh_users.describe()

Unnamed: 0,User
count,1990
unique,1971
top,giovanebribeiro
freq,2


In [8]:
gh_followers.describe()

Unnamed: 0,All_Followers
count,1990.0
unique,1065.0
top,
freq,887.0


In [25]:
gh_repositories.describe()

Unnamed: 0,Owner,Member
count,1990.0,1990.0
unique,757.0,1616.0
top,,
freq,1208.0,357.0


In [11]:
gh_users.head()

Unnamed: 0,User
0,tarruda
1,mairatma
2,joselitojunior1
3,marcelcaraciolo
4,luanfonceca


In [12]:
gh_followers.head()

Unnamed: 0,All_Followers
0,"Sannis,danielmahon,csjaba,FergusRedican,Victor..."
1,"brunocoelho,henvic,eduardolundgren,aperrelli,a..."
2,"renatooliveira,jeffesonmaia,jotaefe,duartefq,J..."
3,"thiagoarrais,brunojm,henriquebastos,macndesign..."
4,"brunohenrique,luizvarela,gladson,lucasbibiano,..."


In [26]:
gh_repositories.head()

Unnamed: 0,Owner,Member
0,libmpack/libmpack,"tarruda/Algoritmos,tarruda/archdb,tarruda/back..."
1,"deprecate/steel-avatar,deprecate/steel-cell-de...","mairatma/alloy-ui,mairatma/alloyui.com,mairatm..."
2,"acmh/maecoruja,Cisneiros/projeto-anfa,demianbo...","joselitojunior1/abigobaldo-nunes-adventures,jo..."
3,"irgmedeiros/TCCRecommender,jg1141/Open-Allure-DS","marcelcaraciolo/apontador-api-libs,marcelcarac..."
4,"andrezap/analise_expressao_genica,andrezap/arv...","luanfonceca/168horas,luanfonceca/4stoq,luanfon..."


In [18]:
gh_users_followers = gh_users.join(gh_followers)

gh_users_followers.head()

Unnamed: 0,User,All_Followers
0,tarruda,"Sannis,danielmahon,csjaba,FergusRedican,Victor..."
1,mairatma,"brunocoelho,henvic,eduardolundgren,aperrelli,a..."
2,joselitojunior1,"renatooliveira,jeffesonmaia,jotaefe,duartefq,J..."
3,marcelcaraciolo,"thiagoarrais,brunojm,henriquebastos,macndesign..."
4,luanfonceca,"brunohenrique,luizvarela,gladson,lucasbibiano,..."


In [19]:
gh_users_followers['nF'] = gh_users_followers['All_Followers'].apply(lambda x: (len(x) and len(x.split(','))) or len(x))

gh_users_followers.head()

Unnamed: 0,User,All_Followers,nF
0,tarruda,"Sannis,danielmahon,csjaba,FergusRedican,Victor...",570
1,mairatma,"brunocoelho,henvic,eduardolundgren,aperrelli,a...",363
2,joselitojunior1,"renatooliveira,jeffesonmaia,jotaefe,duartefq,J...",350
3,marcelcaraciolo,"thiagoarrais,brunojm,henriquebastos,macndesign...",330
4,luanfonceca,"brunohenrique,luizvarela,gladson,lucasbibiano,...",301


In [22]:
gh_users_followers.describe()

Unnamed: 0,nF
count,1990.0
mean,6.076382
std,25.518735
min,0.0
25%,0.0
50%,1.0
75%,4.0
max,570.0


In [23]:
gh_users_followers.to_csv('temp_users_all-followers.csv', index=False) 