In [45]:
from langchain import LLMChain, PromptTemplate
from langchain.llms import OpenAI
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

import psycopg2 as pg
import os


with open('open_api_key.txt', 'r') as f:
    openai_key = f.read().strip()

#모델, 아웃풋 파싱 준비
model = ChatOpenAI(model="gpt-4", api_key=openai_key)
parser = StrOutputParser()

# Get Activity From Github, X, and Mainnet 
B0 ~ B7 on Diagram

In [48]:
# to take the github repo data
import requests
from datetime import datetime, timedelta
from pprint import pprint
import json
import psycopg2 as pg
from psycopg2 import sql

def github_api_request(owner, repo, endpoint, params={}):
    if endpoint:
        base_url = f'https://api.github.com/repos/{owner}/{repo}/{endpoint}'
    else:
        base_url = f'https://api.github.com/repos/{owner}/{repo}'
    response = requests.get(base_url, params=params)
    return response.json()

def count_commits(owner, repo, params={}):
    commits = github_api_request(owner, repo, 'commits', params)
    return len(commits)

def count_issues(owner, repo, params={}):
    issues_data = github_api_request(owner, repo, 'issues', params)
    issue_count = len([issue for issue in issues_data if 'pull_request' not in issue])
    return issue_count

def count_pull_requests(owner, repo, params={}):
    pulls = github_api_request(owner, repo, 'pulls', params)
    return len(pulls)

def count_watcher(owner, repo, parmas={}):
    repo = github_api_request(owner, repo, '')
    # pprint(repo)
    return repo['watchers_count']

def connect_to_db(host, port, user, passwd):
    conn = pg.connect(
        host=host,
        port=port,
        database='your_database',
        user=user,
        password=passwd
    )
    return conn


def insert_to_db(conn, table : str, data : dict):

    cur = conn.cursor()
    insert_query = sql.SQL("INSERT INTO {table} ({columns}) VALUES ({values})").format(
        table=sql.Identifier(table),
        columns=sql.SQL(', ').join(map(sql.Identifier, data.keys())),
        values=sql.SQL(', ').join(map(sql.Literal, data.values()))
    )
    cur.execute(insert_query)
    conn.commit()
    cur.close()

# Scoring Algorithm(s)
Counts Activity. Currently Measured Daily. 

In [49]:
# set the owner and repo
owner = 'octocat'
repo = 'Hello-World'

# get the diff between today and month
today = datetime.utcnow()
last_month = today - timedelta(days=30)

# change the datetime foramt as ISO 8601
since = last_month.isoformat() + 'Z'
until = today.isoformat() + 'Z'

# request the number of Commits
params = {'since': since, 'until': until}
commit_count = count_commits(owner, repo, params)
print(f"Commits in the last day: {commit_count}")

# request the number of Issues
issue_count = count_issues(owner, repo, params)
print(f"Issues in the last day: {issue_count}")

# request the number of Pull Requests
pull_count = count_pull_requests(owner, repo, params)
print(f"Pull Requests in the last day: {pull_count}")

# request the number of Watchers

before_month_watcher_params = {"since": since, "until": since}
before_month_watcher_count = count_watcher(owner, repo, before_month_watcher_params)

after_month_watcher_params = {"since": until, "until": until}
after_month_watcher_count = count_watcher(owner, repo, after_month_watcher_params)

watcher_diff = after_month_watcher_count - before_month_watcher_count
print(f"Watchers in the last day: {watcher_diff}")

Commits in the last day: 0
Issues in the last day: 19
Pull Requests in the last day: 30
Watchers in the last day: 0


# Beginning - SME's tweaking

# Rule Based Classification; LLM-As a Judge Prompt 
Based on the Activity, now we judge if project is dying. 
You can change the threshold in prompt, 'system_template'.

In [42]:
system_template = """
You should rate the project based on the following criteria, 
but at this time, only the github information will be suggested. 
and if the score is less than 15, you should consider the project as dead.
Unless, you should consider the project as alive.
and you should make the report based on your own opinion for the project. 
:

### scoring criteria

1. GitHub Score (30 points total)

Frequency of commits:

- more than 20 commits within last month : 12 points
- more than 10 commits within last month  : 9 points
- Less than three commits within last month: 3 points
- No commits for more than 1 months: 0 points

Number of increased watchers:

- 5 or more increased : 9 points
- 3-4 increased : 6 points
- 1-2 increased : 3 points
- Not increased : 0 points

Issues and pull request activity:

- Frequent activity (discussions, quick resolutions within 1-2 weeks): 9 points
- Moderate activity (some unresolved issues, resolutions taking longer): 6 points
- Minimal activity (many unresolved issues, responses taking months): 3 points
- No issue or pull request activity: 0 points

### data
{data}
"""

data_input = f"""
commits in last month: {commit_count}
issues in last month: {issue_count}
pull request activity: {pull_count}
increased watchers: {watcher_diff}

"""

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{data}")]
)
chain = prompt_template | model | parser

Execute below to see evaluation results. 

In [51]:
result = chain.invoke({"data": data_input})
print(result)

Based on the given data, I would score the project as follows:

Frequency of commits: No commits for more than 1 month, so 0 points.

Number of increased watchers: No increased watchers, so 0 points.

Issues and pull request activity: Given the number of issues and pull request activity, it seems there is frequent activity. However, without knowing the speed of resolution, it's difficult to accurately score this. If we assume quick resolutions, this would be 9 points. 

Therefore, the total GitHub score for this project is 9 points. Since this score is less than 15, I would consider the project as dead. However, the high degree of issues and pull request activity does suggest that there may still be interest and activity within the project, so it might be worth investigating further before making a final decision.


# Suggested Evaluation Threshold for Rule based approach: 
Based on 
- Data on average commit counts and contributor distributions are derived from **studies on GitHub’s public repositories**, including blockchain-specific projects.
- Social media engagement metrics are based on **typical ranges observed in crypto community analytics and social media benchmarking reports**.
- Mainnet transaction volumes and active address counts were informed by blockchain analytics from mid-tier to popular blockchain projects, as observed in platforms like Etherscan, Dune Analytics, and other blockchain explorer data


1. Github Metrics: 
Commit Frequency Thresholds:

High (> 50 commits/month): Projects with consistent, high commit rates are typically under active development. According to data on open-source repositories, the median commit count per month for actively maintained repositories is around 50 commits. Blockchain projects that surpass this are considered highly active.
Moderate (20-50 commits/month): This range captures projects that are steadily maintained but might not be under intense development. The 20-50 range represents the interquartile range for many active blockchain projects.
Low (< 20 commits/month): Projects with fewer than 20 commits per month are often in maintenance mode or seeing reduced activity. This threshold aligns with the lower quartile of commit activity seen across many GitHub repositories.
Contributor Count Thresholds:

High (> 10 contributors): Active blockchain projects often involve multiple developers. Data from open-source software (OSS) projects indicates that successful and actively maintained projects usually have at least 10 contributors.
Moderate (5-10 contributors): This range represents moderately collaborative projects, still maintaining sufficient contributor engagement.
Low (< 5 contributors): Projects with fewer than 5 active contributors often indicate limited engagement, which is common in smaller or declining projects.

Issue and Pull Request Management Thresholds:

High Responsiveness (80%+ resolved in 30 days): A study on OSS repositories shows that well-maintained projects resolve the majority of issues quickly, with 80% or more closed within a month.
Moderate Responsiveness (50-80% resolved): Represents adequate maintenance; still actively managed but not as promptly.
Low Responsiveness (< 50% resolved): Slow resolution rates often signify stalled or struggling projects.
Forks and Stars:

Growing, stable, and declining trends are based on changes observed over a 6-month period, with active projects generally showing a steady increase in forks and stars as proxies for interest and adoption.

2. Social Media (X/Twitter) Metrics:

Engagement Metrics:

High (> 1000 engagements/month): Top-performing blockchain projects tend to have high visibility, with notable engagement rates. For active blockchain projects, 1000 engagements per month (including likes, retweets, replies) represents a strong level of community interaction.
Moderate (500-1000 engagements/month): Reflects decent engagement and a still-active community.
Low (< 500 engagements/month): Often indicates limited reach or waning interest, common for smaller or less visible projects.

Sentiment Analysis:

Positive (> 0.3 average score): Sentiment analysis tools classify above 0.3 as generally positive, which is typical for thriving communities.
Neutral (-0.3 to 0.3): Mixed or balanced sentiment is indicative of a stable but not overly enthusiastic community.
Negative (< -0.3): Projects facing issues often attract negative sentiment, which can be a sign of underlying problems.

Follower Growth:

High Growth (> 5%/month): Reflects growing popularity and increasing community size, aligned with successful projects.
Moderate Growth (1-5%/month): Indicates steady growth, sufficient to suggest ongoing interest.
Low Growth (< 1%/month): Stagnant or declining follower counts can indicate declining project momentum.

3. Mainnet Transactions and On-Chain Activity Metrics:

Mainnet Transaction Volume: 

High (> 10,000 transactions/month): For blockchain projects with active usage, this figure represents the average seen in mid-tier blockchain networks. Larger projects often exceed this by a significant margin.
Moderate (5,000-10,000 transactions/month): This range captures moderate usage levels, typical for projects with a specific niche or less frequent interactions.
Low (< 5,000 transactions/month): Projects with fewer than 5,000 transactions per month are generally underused, often seen in struggling or less-adopted networks.

Active Addresses:

High (> 2,000 addresses/month): Represents a healthy user base engaging with the blockchain, as seen in moderately successful blockchain ecosystems.
Moderate (1,000-2,000 addresses/month): Indicates moderate engagement, sufficient to maintain activity but not indicative of high growth.
Low (< 1,000 addresses/month): Common in smaller or declining networks, signaling reduced user interaction.
Token Circulation and Transfers:

High (> 20,000 transfers/month): High token movement suggests active trading or usage within the ecosystem.
Moderate (10,000-20,000 transfers/month): Indicates ongoing but not extensive token usage.
Low (< 10,000 transfers/month): Often reflects a lack of user engagement or low network activity.


# Individual Project 조회 Dashboard

# Inference Evaluation for Rule Based Classification
How Well does model perform? Compare y and y'
(실 데이터가 갖추어진 뒤 실행)

# Suggest Analysis 1 

# Benchmarking Against Competitors: 
Compare the project’s metrics with those of similar projects or competitors to contextualize the performance. 
It might be 

# Historical Trend Analysis
Time series Analysis



In [None]:
deleteThis = "hello world"