In [None]:
from urllib.request import urlopen
import pandas as pd
import requests
from pygitapi import HubAPI
from tqdm import tqdm

git = HubAPI('git_token')

### Read the csv file

In [None]:
data = pd.read_csv('./data/sponsored_user.csv')

### Get the number of repositories owned by the user, get the number of the given user is followed by, the number of organizations the user belongs to, the number of pull requests associated with this user, and the number of users is sponsoring

In [None]:
import time

repo = []
follower = []
sponsoring = []
joined_org = []
open_pr = []

for user in tqdm(data['sponsor_username']):
    query = """
        query{
            user(login: "username") {
                sponsoring(first: 100) {
                  totalCount
                }
                pullRequests(
                  first: 100
                  orderBy: {field: UPDATED_AT, direction: DESC}
                ) {
                  totalCount
                  nodes {
                    repository {
                      name
                      isPrivate
                    }
                  }
                }
                organizations(first: 100) {
                      totalCount
                    }
                followers(first: 100) {
                  totalCount
                  edges {
                    node {
                      login
                      followers(first: 0) {
                        totalCount
                      }
                    }
                    cursor
                  }
                  pageInfo {
                    endCursor
                    hasNextPage
                  }
                }
                repositories (first:100){
                  totalCount
                  nodes {
                    name
                    description
                    primaryLanguage {
                      name
                    }
                  }
                }
              }
        }""".replace('username', user)
    tiers = git.custom_query(query)
    if str(tiers['user']) == "None":
        repo.append("None")
        follower.append("None")
        sponsoring.append("None")
        joined_org.append("None")
        open_pr.append("None")
    else:
        repo.append(tiers['user']['repositories']['totalCount'])
        follower.append(tiers['user']['followers']['totalCount'])
        sponsoring.append(tiers['user']['sponsoring']['totalCount'])
        joined_org.append(tiers['user']['organizations']['totalCount'])
        open_pr.append(tiers['user']['pullRequests']['totalCount'])
    time.sleep(1)

### Get user's pull request review contributions made by the user(returns the most recently submitted review for each PR reviewed by the user and user's primary language

In [None]:
import time

lang = []
lang_use = []
review_pr = []

for user in tqdm(data['sponsor_username']):
    query = """
        query {
          viewer {
            login
          }
          rateLimit {
            limit
            cost
            remaining
            resetAt
          }
    }
    """
    resp = git.custom_query(query)
    if resp['rateLimit']['remaining'] <= 110:
        time.sleep(3600)
    query = """
        query{
            user(login: "username") {
                contributionsCollection {
                  pullRequestReviewContributions(first: 1, orderBy: {direction: ASC}) {
                    totalCount
                  }
                }
                repositories(first: 100) {
                  totalCount
                  edges {
                    node {
                      primaryLanguage {
                        name
                      }
                    }
                  }
                  pageInfo {
                    endCursor
                    hasNextPage
                    startCursor
                  }
                }
              }
        }""".replace('username', user)
    tiers = git.custom_query(query)

    if str(tiers['user']) == "None":
        lang_use.append('Not Found')
        review_pr.append('Not Found')
    else:
        review_pr.append(tiers['user']['contributionsCollection']
                         ['pullRequestReviewContributions']['totalCount'])
        if len(tiers['user']['repositories']['edges']) == 0:
            lang_use.append('Undetermined')
        else:
            for x in tiers['user']['repositories']['edges']:
                if str(x['node']['primaryLanguage']) == "None":
                    lang.append('Undetermined')
                else:
                    lang.append(x['node']['primaryLanguage']['name'])

    if str(tiers['user']) != "None":
        while tiers['user']['repositories']['pageInfo']['hasNextPage']:
            endcursor = tiers['user']['repositories']['pageInfo']['endCursor']
            query = """
            query{
                user(login: "username") {
                    repositories(first: 100,after:"endcursor") {
                      totalCount
                      edges {
                        node {
                          primaryLanguage {
                            name
                          }
                        }
                      }
                      pageInfo {
                        endCursor
                        hasNextPage
                        startCursor
                      }
                    }
                  }
            }""".replace('endcursor', endcursor).replace('username', user)
            tiers = git.custom_query(query)
            for x in tiers['user']['repositories']['edges']:
                if str(x['node']['primaryLanguage']) == "None":
                    lang.append('Undetermined')
                else:
                    lang.append(x['node']['primaryLanguage']['name'])
    if len(lang) != 0:
        df = pd.DataFrame(lang, columns=['lang'])
        lang_use.append(df['lang'].mode()[0])
        lang = []
        df = pd.DataFrame()

In [None]:
df = pd.DataFrame()

### Appended each list to the dataframe

In [None]:
df['user'] = data['sponsor_username']
df['sponsors'] = data['number_of_sponsors']
df['repositories'] = repo
df['sponsoring'] = sponsoring
df['openedPRs'] = open_pr
df['reviewedPRs'] = review_pr
df['followers'] = follower
df['organizations'] = joined_org
df['language'] = lang_use

### Save as csv file

In [None]:
df.to_csv('../data/PSM/PSMdata.csv', index=False)