In [3]:
import pandas as pd
import numpy as np
from github import Auth
from github import Github
import json
import pickle
import os
import time
import shutil
from tqdm import tqdm

In [11]:
ACCESS_TOKEN = json.load(open("./config"))["access_token"]

REPO_METADATA_PATH = "./Data/BlockchainAppRepositories-Old.csv"

REPOSITORIES_BIN_DATA_PATH = "./Data/Repositories.bin"
ISSUE_DATA_PATH = "./Data/AllIssues-New.csv"

BACKUP_PATH = "./Data/Backup"

In [5]:
auth = Auth.Token(ACCESS_TOKEN)
github = Github(auth=auth)

In [6]:
def rate_limited(interval=600):
    last_executed = 0 

    def decorator(func):
        def wrapper(*args, **kwargs):
            nonlocal last_executed
            current_time = time.time()
            if current_time - last_executed >= interval:
                last_executed = current_time
                return func(*args, **kwargs)
            else:
                pass
        return wrapper
    return decorator

@rate_limited(120)
def backup(paths):
    if not os.path.isdir(BACKUP_PATH):
        os.mkdir(BACKUP_PATH)
    for path in paths:
        if os.path.exists(path):
            shutil.copyfile(path, BACKUP_PATH + "/" + os.path.basename(path) + time.strftime("%Y%m%d-%H%M%S"))

class Repository:
    def __init__(self, github_client, repo_url):
        self.github_client = github_client
        self.identifier = self.parse_repo_identifier(repo_url)
        self.issues = []
        self.issue_comments = {}

    def parse_repo_identifier(self, repo_url):
        owner = repo_url.split("/")[-2]
        repo_name = repo_url.split("/")[-1]
    
        return owner + "/" + repo_name
    
    def get_identifier(self):
        return self.identifier

    def collect_info(self):
        self.repo = self.github_client.get_repo(self.identifier)
        self.set_issues(self.repo.get_issues(state="all"))

    def set_issues(self, issues):
        self.issues = issues
        for issue in self.issues:
            self.set_comments(issue.number, issue.get_comments())

    def set_comments(self, issue_id, comments):
        self.issue_comments[issue_id] = comments

    def get_issues(self):
        issues = []
        for issue in self.issues:
            issues.append({
                "Repository": self.identifier,
                "IssueId": issue.number,
                "Title": issue.title,
                "Body": issue.body,
                "State": issue.state,
                "Label": ",".join([label.name for label in issue.labels]),
                "CreatedAt": issue.created_at,
                "ClosedAt": issue.closed_at
            })
        return pd.DataFrame(issues)

    ## Not necessary and takes too much time
    def get_comments(self):
        comments = []
        for issue_id in self.issue_comments.keys():
            for comment in self.issue_comments[issue_id]:
                comments.append({
                    "Repository": self.identifier,
                    "IssueId": issue_id,
                    "CommentId": comment.id,
                    "Comment": comment.body
                })
        return pd.DataFrame(comments)

In [7]:
repositories = []

processed_repos = []
if os.path.exists(ISSUE_DATA_PATH):
    processed_repos = pd.read_csv(ISSUE_DATA_PATH)["Repository"].unique().tolist()

df = pd.read_csv(REPO_METADATA_PATH)

for url in tqdm(df["repo_url"].to_list()):
    try:
        repo = Repository(github, url)
        if not repo.get_identifier() in processed_repos:
            repo.collect_info()
            repositories.append(repo)

            if os.path.exists(ISSUE_DATA_PATH):
                issues = pd.read_csv(ISSUE_DATA_PATH)
                issues = pd.concat([issues, repo.get_issues()], ignore_index=True, sort=False)
            else:
                issues = repo.get_issues()
            
            backup([ISSUE_DATA_PATH, REPOSITORIES_BIN_DATA_PATH])

            issues.to_csv(ISSUE_DATA_PATH, index=False)
            #pickle.dump(repositories,  open(REPOSITORIES_BIN_DATA_PATH, 'wb'))
    except Exception as e:
        print("Error: ", e)

  2%|▏         | 21/991 [01:52<34:29,  2.13s/it]  

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


  2%|▏         | 23/991 [01:53<21:14,  1.32s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


  2%|▏         | 24/991 [01:53<16:25,  1.02s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


  5%|▍         | 47/991 [03:32<28:21,  1.80s/it]  Following Github server redirection from /repos/Immutal0/solana-pumpfun-bundler-bot to /repositories/931381573
 11%|█         | 108/991 [06:20<18:16,  1.24s/it] 

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 11%|█         | 111/991 [08:16<7:05:37, 29.02s/it]Following Github server redirection from /repos/sivicstudio/starkludo to /repositories/826372536
 21%|██        | 210/991 [16:52<55:16,  4.25s/it]  

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 22%|██▏       | 222/991 [17:57<47:43,  3.72s/it]  

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 26%|██▌       | 259/991 [21:15<50:10,  4.11s/it]  

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 28%|██▊       | 275/991 [24:16<8:02:47, 40.46s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 32%|███▏      | 318/991 [28:55<1:25:41,  7.64s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 35%|███▍      | 343/991 [32:56<1:29:02,  8.24s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 41%|████      | 406/991 [42:21<1:15:23,  7.73s/it]Following Github server redirection from /repos/brightlystar0117/solana-sniper to /repositories/756151094
 43%|████▎     | 423/991 [44:10<41:07,  4.34s/it]  Following Github server redirection from /repos/vininha-carvalho/Solana-Algorithmic-Trading-Software to /repositories/753026967
 46%|████▌     | 453/991 [48:18<49:05,  5.47s/it]  

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 51%|█████     | 505/991 [57:21<4:00:22, 29.68s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 53%|█████▎    | 530/991 [1:02:09<2:16:36, 17.78s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 54%|█████▍    | 534/991 [1:04:40<4:11:41, 33.04s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 63%|██████▎   | 622/991 [1:23:59<45:27,  7.39s/it]  

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 63%|██████▎   | 628/991 [1:24:45<48:06,  7.95s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 65%|██████▍   | 643/991 [1:27:49<1:31:11, 15.72s/it]Following Github server redirection from /repos/heyxyz/hey to /repositories/471728341
 67%|██████▋   | 663/991 [1:33:49<47:53,  8.76s/it]  

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 68%|██████▊   | 676/991 [1:35:54<1:02:01, 11.82s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 69%|██████▉   | 684/991 [1:37:20<1:13:42, 14.40s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 72%|███████▏  | 711/991 [1:42:29<41:53,  8.98s/it]  Following Github server redirection from /repos/Immutal0/solana-raydium-volume-bot to /repositories/931378291
 82%|████████▏ | 817/991 [2:10:03<47:42, 16.45s/it]   Following Github server redirection from /repos/cardano-foundation/cf-lob-platform to /repositories/823012927
 83%|████████▎ | 826/991 [2:11:41<27:59, 10.18s/it]Following Github server redirection from /repos/cryptoking000/top-solana-arbitrage-bot to /repositories/921369048
 84%|████████▎ | 828/991 [2:12:02<27:49, 10.24s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 85%|████████▍ | 841/991 [2:14:16<26:54, 10.76s/it]

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


 97%|█████████▋| 962/991 [2:41:51<10:20, 21.38s/it]  

Error:  404 {"message": "Not Found", "documentation_url": "https://docs.github.com/rest", "status": "404"}


100%|██████████| 991/991 [2:49:44<00:00, 10.28s/it]


In [8]:
repo = github.get_repo("opexdev/core")

In [14]:
df

Unnamed: 0,url,name,description,language,Category,blockchain
0,https://api.github.com/repos/askmike/gekko,gekko,A bitcoin trading bot written in node,JavaScript,Crypto,bitcoin
1,https://api.github.com/repos/butor/blackbird,blackbird,Blackbird Bitcoin Arbitrage: a long/short mark...,C++,Crypto,bitcoin
2,https://api.github.com/repos/MetaMask/metamask...,metamask-extension,:globe_with_meridians: :electric_plug: metamas...,JavaScript,Crypto,ethereum
3,https://api.github.com/repos/bitcoin-wallet/bi...,bitcoin-wallet,Bitcoin Wallet app for your Android device. St...,Java,Crypto,bitcoin
4,https://api.github.com/repos/yasinkuyu/binance...,binance-trader,Bitcoin Trading Bot for Binance (Experimental),,Crypto,multi
...,...,...,...,...,...,...
1189,https://api.github.com/repos/simplestaking/tez...,tezos-wallet,,,Crypto,tezos
1190,https://api.github.com/repos/SJSU272LabF17/Org...,OrganDonation-Chain,Organ Waitlist ledger on a blockchain,JavaScript,Others,hyperledger fabric
1191,https://api.github.com/repos/dfoderick/bitshovel,bitshovel,Bitcoin message bus bridge to internal message...,,Others,bitcoin
1192,https://api.github.com/repos/ampleforth/market...,market-oracle,Set of smart contracts on Ethereum deal with e...,JavaScript,Crypto,ethereum


In [17]:

df = pd.read_csv(REPO_METADATA_PATH)
updated_at = []
for url in tqdm(df["url"].to_list()):
    owner = url.split("/")[-2]
    repo_name = url.split("/")[-1]
    try:
        repo = github.get_repo(f"{owner}/{repo_name}")
        updated_at.append(repo.pushed_at)
    except:
        updated_at.append(None)

  1%|          | 6/1194 [00:02<08:29,  2.33it/s]Following Github server redirection from /repos/TokenMarketNet/ico to /repositories/85634144
  1%|          | 9/1194 [00:03<08:24,  2.35it/s]Following Github server redirection from /repos/hyperledger/blockchain-explorer to /repositories/67946944
  1%|          | 11/1194 [00:04<09:02,  2.18it/s]Following Github server redirection from /repos/zkSNACKs/WalletWasabi to /repositories/55341469
  1%|          | 14/1194 [00:06<08:55,  2.21it/s]Following Github server redirection from /repos/aragon/aragon to /repositories/83571693
  2%|▏         | 20/1194 [00:08<09:04,  2.16it/s]Following Github server redirection from /repos/poanetwork/blockscout to /repositories/117294980
  2%|▏         | 21/1194 [00:09<09:24,  2.08it/s]Following Github server redirection from /repos/Sparklemobile/Sparkle-Proof-Of-Loyalty to /repositories/232508972
  2%|▏         | 28/1194 [00:12<09:31,  2.04it/s]Following Github server redirection from /repos/Sparklemobile/Spa

In [None]:
df["updated_at"] = updated_at
df = df[df["updated_at"] >= "2024-02-21"]
df.to_csv("./Data/BlockchainAppRepositories-Old-Actives.csv", index=False)

In [38]:
actve_repo = [url.split("/")[-2] + "/" + url.split("/")[-1] for url in df["url"].to_list()]

In [None]:
issues = pd.read_csv("./Data/AllIssues-Old.csv")
issues = issues[issues["Repository"].isin(actve_repo)]
issues.to_csv("./Data/AllIssues-Old-Actives.csv", index=False)

  issues = pd.read_csv("./Data/AllIssues-Old.csv")


Unnamed: 0,Repository,IssueId,Title,Body,State,Label,CreatedAt,ClosedAt
0,askmike/gekko,2892,"Sqlite issue, Gekko no finding sqlite on live ...","**Note: this is the technical bug tracker, ple...",closed,,2020-02-14 19:59:40+00:00,2020-02-16 14:15:40+00:00
1,askmike/gekko,2891,Unable to see the functionality of any page,![Screenshot (2)](https://user-images.githubus...,closed,,2020-02-13 04:46:29+00:00,2020-02-16 14:15:51+00:00
2,askmike/gekko,2890,Error: non-error thrown: ERROR: Talib is not e...,"**Note: this is the technical bug tracker, ple...",closed,,2020-01-28 15:51:00+00:00,2020-02-16 14:16:37+00:00
3,askmike/gekko,2889,Update dependencies for Kraken import,I had to add/update these packages to be able ...,closed,,2020-01-26 19:33:56+00:00,2020-02-16 14:18:45+00:00
4,askmike/gekko,2888,Cannot trade on coinfalcon,"**Note: this is the technical bug tracker, ple...",closed,,2020-01-21 23:44:54+00:00,2020-02-16 14:16:08+00:00
...,...,...,...,...,...,...,...,...
332670,thebeetoken/beenest-web,5,Upgrade webpack-dev-server,https://www.npmjs.com/advisories/725\r\n\r\n##...,closed,,2019-01-02 20:34:21+00:00,2019-01-02 23:40:14+00:00
332671,thebeetoken/beenest-web,4,Host onboarding redirect,## Description\r\nMinor Host onboarding tweaks...,closed,,2019-01-02 18:45:37+00:00,2019-01-02 19:50:50+00:00
332672,thebeetoken/beenest-web,3,admin ux tweaks,"## Description\r\nRemoves unneeded CSS, joins ...",closed,,2019-01-02 18:32:57+00:00,2019-01-02 19:37:39+00:00
332673,thebeetoken/beenest-web,2,Deploy,fix for photo and country code saving,closed,,2018-12-30 03:56:00+00:00,2018-12-30 04:17:11+00:00
