In [54]:
import sys
from pathlib import Path
import warnings
warnings.filterwarnings("ignore", module="IPython")

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml

if is_google_colab():
    clone_repository()
    install_dependencies()
    root_dir = str(Path().absolute())
    print("Google Colab environment")
else:
    root_dir = Path().absolute()
    # Strip ~/notebooks/ccfraud from PYTHON_PATH if notebook started in one of these subdirectories
    if root_dir.parts[-1:] == ('airquality',):
        root_dir = Path(*root_dir.parts[:-1])
    if root_dir.parts[-1:] == ('notebooks',):
        root_dir = Path(*root_dir.parts[:-1])
    root_dir = str(root_dir) 
    print("Local environment")

print(f"Root dir: {root_dir}")

# Add the root directory to the `PYTHONPATH` 
if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

Local environment
Root dir: /Users/cuiyuting/Desktop/release_popularity_prediction


In [55]:
import datetime
from datetime import datetime
import time
import requests
import pandas as pd
import hopsworks
import json
import warnings
warnings.filterwarnings("ignore")

In [56]:
# Retrieve feature groups
project = hopsworks.login()
fs = project.get_feature_store() 
trending_fg = fs.get_feature_group(
    name='trending_info',
    version=1,
)
release_fg = fs.get_feature_group(
    name='release_info',
    version=1,
)

2026-01-11 20:16:17,479 INFO: Closing external client and cleaning up certificates.


Connection closed.
2026-01-11 20:16:17,748 INFO: Initializing external client
2026-01-11 20:16:17,753 INFO: Base URL: https://c.app.hopsworks.ai:443






2026-01-11 20:16:19,358 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1342613


# Trending Info
## Download and parse data

In [57]:
# scrape github trending data
from bs4 import BeautifulSoup

today = datetime.now().strftime('%Y-%m-%d') 
print(f"Fetching {today} 's GitHub Trending Page...")
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
try:
    response = requests.get("https://github.com/trending", headers=HEADERS, timeout=30)
    response.raise_for_status()
except requests.exceptions.RequestException as e:
    print(f"Error: {e}")

soup = BeautifulSoup(response.text, 'html.parser')
repos = soup.find_all('article', {'class': 'Box-row'})
processed_data = []
for repo in repos:
    try:
        title_elem = repo.find('h2') or repo.find('h1')
        if not title_elem:
            continue
        link = title_elem.find('a')
        if not link:
            continue
        repo_full_name = link.get('href', '').lstrip('/')
        if not repo_full_name:
            continue
        
        processed_data.append({
            'date': today,
            'repo_full_name': repo_full_name
        })   
    except Exception as e:
        continue

trending_df = pd.DataFrame(processed_data)
trending_df

Fetching 2026-01-11 's GitHub Trending Page...


Unnamed: 0,date,repo_full_name
0,2026-01-11,anomalyco/opencode
1,2026-01-11,obra/superpowers
2,2026-01-11,frankbria/ralph-claude-code
3,2026-01-11,davila7/claude-code-templates
4,2026-01-11,makeplane/plane
5,2026-01-11,twitter/twemoji
6,2026-01-11,bytedance/UI-TARS-desktop
7,2026-01-11,twentyhq/twenty
8,2026-01-11,home-assistant/home-assistant.io
9,2026-01-11,anthropics/claude-code


In [58]:
import os
from dotenv import load_dotenv

load_dotenv()
GITHUB_TOKEN = os.getenv('GITHUB_TOKEN', '')
HEADERS = {
    'Authorization': 'token ' + GITHUB_TOKEN,
    'Accept': 'application/vnd.github.v3+json'
}

trending_df_with_topics = trending_df.copy()
trending_df_with_topics['topics'] = [[] for _ in range(len(trending_df_with_topics))]

for i, row in trending_df_with_topics.iterrows():
        repo_name = row['repo_full_name']
        if pd.isna(repo_name):
            continue
        try:
            response = requests.get(
                f"https://api.github.com/repos/{repo_name}",
                headers=HEADERS,
                timeout=10
            )
            if response.status_code == 200:
                topics = response.json().get('topics', [])
                trending_df_with_topics.at[i, 'topics'] = topics
            else:
                print(f"  Error: {response.status_code}")
                trending_df_with_topics.at[i, 'topics'] = []
        except Exception as e:
            print(f"  Error: {e}")
            trending_df_with_topics.at[i, 'topics'] = []
        time.sleep(0.8 if GITHUB_TOKEN else 2)

trending_df_with_topics

Unnamed: 0,date,repo_full_name,topics
0,2026-01-11,anomalyco/opencode,[]
1,2026-01-11,obra/superpowers,[]
2,2026-01-11,frankbria/ralph-claude-code,"[ai, ai-agent, ai-agents, ai-development, ai-d..."
3,2026-01-11,davila7/claude-code-templates,"[anthropic, anthropic-claude, claude, claude-c..."
4,2026-01-11,makeplane/plane,"[boards, bug-tracker, django, docker, gantt, i..."
5,2026-01-11,twitter/twemoji,"[emoji, twemoji]"
6,2026-01-11,bytedance/UI-TARS-desktop,"[agent, agent-tars, browser-use, computer-use,..."
7,2026-01-11,twentyhq/twenty,"[crm, crm-system, customer, good-first-issue, ..."
8,2026-01-11,home-assistant/home-assistant.io,"[documentation, hacktoberfest, hass, hassio, h..."
9,2026-01-11,anthropics/claude-code,[]


In [59]:
trending_df_with_topics = trending_df_with_topics.drop(columns=['repo_full_name'])

def parse_topics(topic_str):
    if ',' in topic_str:
        return [t.strip() for t in topic_str.split(',')]
    else:
        return [topic_str.strip()]

# trending_df_with_topics['topics'] = trending_df_with_topics['topics'].apply(parse_topics)
trending_df_with_topics['date'] = pd.to_datetime(trending_df_with_topics['date'])

def aggregate_topics(group):
    all_topics = []
    for topics in group:
        all_topics.extend(topics)
    return list(set(all_topics))

daily_topics_df = trending_df_with_topics.groupby('date')['topics'].apply(aggregate_topics).reset_index()
daily_topics_df.columns = ['date', 'all_topics']

daily_topics_df

Unnamed: 0,date,all_topics
0,2026-01-11,"[anthropic-claude, work-management, pdf-conver..."


In [60]:
daily_topics_df['all_topics'] = daily_topics_df['all_topics'].apply(
    lambda x: json.dumps(x) if x else '[]'
)
daily_topics_df

Unnamed: 0,date,all_topics
0,2026-01-11,"[""anthropic-claude"", ""work-management"", ""pdf-c..."


## Uploading new data to the Feature Store

In [None]:
# Insert new data
trending_fg.insert(daily_topics_df)

# Release Info
## Download and parse data

In [61]:
import os, time, bisect, logging
import requests
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv

load_dotenv()
GITHUB_TOKEN = os.getenv('GITHUB_TOKEN', '')
HEADERS = {
    'Authorization': 'token ' + GITHUB_TOKEN,
    'Accept': 'application/vnd.github.v3+json'
}
HEADERS_STAR = {
    'Authorization': 'token ' + GITHUB_TOKEN,
    'Accept': 'application/vnd.github.v3.star+json'
}

session = requests.Session()
session.headers.update(HEADERS)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def search_recent_repos(max_repos_per_page, days_back = 366):
    repos = []
    since_date = (datetime.now() - timedelta(days=days_back)).isoformat()
    seven_days_ago = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
    url = f"https://api.github.com/search/repositories"
    params = {
        'q': f'created:>{since_date} stars:100..5000 pushed:<{seven_days_ago}',
        'sort': 'updated',
        'order': 'desc',
        'per_page': max_repos_per_page,
        'page': 1
    }
    try:
        response = session.get(url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        if 'items' not in data or not data['items']:
            return repos
        for repo in data['items']:
            if repo['size'] > 100:
                repos.append({
                    'full_name': repo['full_name'],
                    'stargazers_count': repo['stargazers_count'],
                    'forks_count': repo['forks_count'],
                    'watchers_count': repo['watchers_count'],
                    'language': repo.get('language', 'Unknown'),
                    'created_at': repo['created_at'],
                    'updated_at': repo['updated_at'],
                    'topics': repo.get('topics', []),
                    'owner': repo.get('owner', {}).get('login') if repo.get('owner') else None
                })
        logger.info(f"{len(repos)} repositories searched.")
        time.sleep(1)
        return repos
    except Exception as e:
        logger.error(f"Error: {e}")
        return repos

def get_repo_releases(owner: str, repo: str):
    url = f"https://api.github.com/repos/{owner}/{repo}/releases"
    try:
        response = session.get(url, timeout=10)
        response.raise_for_status()
        releases = response.json()
        if not isinstance(releases, list):
            return []
        return releases
    except Exception as e:
        logger.error(f"{owner}/{repo} Error: {e}")
        return []

def get_author_features(author_login: str):
    if not author_login:
        return {
            'author_followers': 0,
            'author_public_repos': 0,
            'author_type': 'Unknown'
        }
    try:
        url = f"https://api.github.com/users/{author_login}"
        response = session.get(url, timeout=5)
        if response.status_code == 404:
            return {
            'author_followers': 0,
            'author_public_repos': 0,
            'author_type': 'Unknown'
        }
        response.raise_for_status()
        user_data = response.json()
        return {
            'author_followers': user_data['followers'],
            'author_public_repos': user_data['public_repos'],
            'author_type': user_data['type']
        }
    except Exception as e:
        logger.error(f"Error getting {author_login}: {e}")
        return {
            'author_followers': 0,
            'author_public_repos': 0,
            'author_type': 'Unknown'
        }

def get_first_week_stars(owner, repo, published_at_str):
    published_at = datetime.strptime(published_at_str, "%Y-%m-%dT%H:%M:%SZ")
    cutoff_date = published_at + timedelta(days=7)
    total_stars = 0
    page = 1
    while True:
        url = f"https://api.github.com/repos/{owner}/{repo}/stargazers?per_page=100&page={page}"
        resp = requests.get(url, headers=HEADERS_STAR)
        if resp.status_code != 200:
            logger.error(f"Star Count Failed: {resp.status_code}")
            break
        data = resp.json()
        if not data:
            break
        star_times = [datetime.strptime(star['starred_at'], "%Y-%m-%dT%H:%M:%SZ") for star in data]
        start_idx = bisect.bisect_left(star_times, published_at)
        end_idx = bisect.bisect_right(star_times, cutoff_date)
        page_count = end_idx - start_idx
        total_stars += page_count
        if end_idx == 0:
            break
        if star_times[-1] > cutoff_date and page_count == 0:
            break
        page += 1
        time.sleep(0.5)
    return total_stars

def process_release(repo, release, owner, repo_name):
    try:
        author_login = repo['owner']
        author = get_author_features(author_login)
        first_week_star = get_first_week_stars(owner, repo_name, release['published_at'])
        release_data = {
            'full_name': repo['full_name'] + '/' + release.get('tag_name', ''),
            'repo_stars': repo['stargazers_count'],
            'repo_forks': repo['forks_count'],
            'repo_watchers': repo['watchers_count'],
            'language': repo['language'],
            'repo_created_at': repo['created_at'],
            'repo_updated_at': repo['updated_at'],
            'topics': repo['topics'],
            'release_name': release.get('name', ''),
            'release_body': release.get('body', ''),
            'author_followers': author['author_followers'],
            'author_public_repos': author['author_public_repos'],
            'author_type': author['author_type'],
            'published_at': release.get('published_at', ''),
            'first_week_star': first_week_star
        }
        return release_data
        
    except Exception as e:
        logger.error(f"Error: {e}")
        return None

def collect_release_data(max_repos_per_page = 30):
    logger.info("Start collecting...")
    repos = search_recent_repos(max_repos_per_page)
    all_release_data = []
    for i, repo in enumerate(repos):
        full_name = repo['full_name']
        owner, repo_name = full_name.split('/')
        logger.info(f"Collecting repo {i+1}/{len(repos[:max_repos_per_page])}: {full_name}")
        releases = get_repo_releases(owner, repo_name)
        if releases:
            try:
                release_data = process_release(repo, releases[0], owner, repo_name)
                if release_data:
                    all_release_data.append(release_data)
                    logger.info(f"  Release collected: {releases[0].get('tag_name', 'unknown')}")
            except Exception as e:
                logger.error(f"Error: {e}")
                continue
        time.sleep(0.5)
    logger.info(f"{len(repos)} repos in total collected.")
    logger.info(f"{len(all_release_data)} releases in total collected.")
    df = pd.DataFrame(all_release_data)
    return df

release_df_original = collect_release_data()
release_df_original

2026-01-11 20:17:02,527 INFO: Start collecting...
2026-01-11 20:17:03,445 INFO: 27 repositories searched.
2026-01-11 20:17:04,448 INFO: Collecting repo 1/27: BrandeisPatrick/blank-space
2026-01-11 20:17:06,582 INFO:   Release collected: v0.4.0
2026-01-11 20:17:07,083 INFO: Collecting repo 2/27: ysharma3501/FlashSR
2026-01-11 20:17:07,935 INFO: Collecting repo 3/27: MakeX-Corp/makex-web
2026-01-11 20:17:08,754 INFO: Collecting repo 4/27: supermemoryai/install-mcp
2026-01-11 20:17:11,234 INFO:   Release collected: v1.10.0
2026-01-11 20:17:11,736 INFO: Collecting repo 5/27: infiniV/VoiceFlow
2026-01-11 20:17:14,661 INFO:   Release collected: v1.3.1
2026-01-11 20:17:15,167 INFO: Collecting repo 6/27: rudrankriyam/Foundation-Models-Framework-Example
2026-01-11 20:17:19,739 INFO:   Release collected: 0.2.0
2026-01-11 20:17:20,240 INFO: Collecting repo 7/27: sameerasw/my-internet
2026-01-11 20:17:23,393 INFO:   Release collected: pre-release-07a1ecb932cefb23e49dc4959a55fbe15e7b038f
2026-01-11

Unnamed: 0,full_name,repo_stars,repo_forks,repo_watchers,language,repo_created_at,repo_updated_at,topics,release_name,release_body,author_followers,author_public_repos,author_type,published_at,first_week_star
0,BrandeisPatrick/blank-space/v0.4.0,104,15,104,JavaScript,2025-10-10T20:45:39Z,2026-01-07T00:52:40Z,"[ai-agent, antrophic, artificial-intelligence,...",v0.4.0 - Unlock the potential of vibe coding,We always prioritize the vibe coding experien...,9,23,User,2025-12-17T00:44:34Z,2
1,supermemoryai/install-mcp/v1.10.0,156,27,156,TypeScript,2025-04-14T23:01:16Z,2026-01-09T15:48:22Z,[],v1.10.0,# [1.10.0](https://github.com/supermemoryai/in...,661,17,Organization,2025-09-23T21:12:13Z,3
2,infiniV/VoiceFlow/v1.3.1,240,18,240,TypeScript,2025-12-13T18:28:09Z,2026-01-11T16:29:11Z,[],VoiceFlow v1.3.1,## What's New\n\n- **Fix:** Resolved crash whe...,12,19,User,2026-01-03T22:50:06Z,9
3,rudrankriyam/Foundation-Models-Framework-Examp...,868,56,868,Swift,2025-06-09T19:29:16Z,2026-01-11T16:20:40Z,[],Release 0.2.0,Bug fixes and improvements,542,132,User,2025-07-09T08:27:20Z,44
4,sameerasw/my-internet/pre-release-07a1ecb932ce...,366,96,366,CSS,2025-02-08T11:26:59Z,2026-01-07T10:11:14Z,"[css, custom, custom-css, customcss, firefox, ...",Pre-Release 07a1ecb932cefb23e49dc4959a55fbe15e...,Pre-release for commit 07a1ecb932cefb23e49dc49...,383,105,User,2025-07-05T09:47:48Z,2
5,sameerasw/airsync-android/v2.2.0,179,12,179,Kotlin,2025-07-28T18:18:40Z,2026-01-08T13:30:34Z,"[android, jetpack, jetpack-compose, kotlin, ma...","Calls, Mirroring over tailscale, Clipboard cha...",# What's New in this feature drop?\r\n- Call s...,383,105,User,2025-12-24T15:53:49Z,4
6,oliverbravery/PrintGuard/v1.0.0b3,195,16,195,Python,2025-05-10T23:31:02Z,2026-01-11T10:28:59Z,[],Beta 3 - Multiplatform docker builds & RTSP st...,Beta 3\r\n- Better camera support and manageme...,7,19,User,2025-07-18T11:50:24Z,20
7,ExtremeXT/ExtremeROM/v2.6.1,501,124,501,Shell,2025-03-28T10:22:50Z,2026-01-11T01:46:16Z,[],v2.6.1,"<a href=""https://github.com/ExtremeXT/ExtremeR...",217,74,User,2025-08-22T14:48:07Z,16
8,MoowGlax/ygg-helper-dl/v1.3,116,5,116,JavaScript,2025-12-21T21:44:39Z,2026-01-10T19:49:08Z,[],Release v1.3,,2,6,User,2026-01-03T22:28:53Z,4
9,Aeastr/SettingsKit/2.0.0,244,5,244,Swift,2025-11-16T15:58:48Z,2026-01-11T05:00:41Z,[],2.0.0,,680,47,User,2026-01-03T22:10:57Z,5


In [62]:
release_df_original.dtypes

full_name              object
repo_stars              int64
repo_forks              int64
repo_watchers           int64
language               object
repo_created_at        object
repo_updated_at        object
topics                 object
release_name           object
release_body           object
author_followers        int64
author_public_repos     int64
author_type            object
published_at           object
first_week_star         int64
dtype: object

In [63]:
date_cols = ['repo_created_at', 'repo_updated_at', 'published_at']
for col in date_cols:
    release_df_original[col] = pd.to_datetime(release_df_original[col])
release_df_original.dtypes

full_name                           object
repo_stars                           int64
repo_forks                           int64
repo_watchers                        int64
language                            object
repo_created_at        datetime64[ns, UTC]
repo_updated_at        datetime64[ns, UTC]
topics                              object
release_name                        object
release_body                        object
author_followers                     int64
author_public_repos                  int64
author_type                         object
published_at           datetime64[ns, UTC]
first_week_star                      int64
dtype: object

In [64]:
release_df = release_df_original.copy()
release_df['topics'] = release_df['topics'].apply(
    lambda x: json.dumps(x) if x else '[]'
)

def parse_topics(topic_str):
    if ',' in topic_str:
        return [t.strip() for t in topic_str.split(',')]
    else:
        return [topic_str.strip()]

release_df['topics'] = release_df['topics'].apply(parse_topics)
release_df

Unnamed: 0,full_name,repo_stars,repo_forks,repo_watchers,language,repo_created_at,repo_updated_at,topics,release_name,release_body,author_followers,author_public_repos,author_type,published_at,first_week_star
0,BrandeisPatrick/blank-space/v0.4.0,104,15,104,JavaScript,2025-10-10 20:45:39+00:00,2026-01-07 00:52:40+00:00,"[[""ai-agent"", ""antrophic"", ""artificial-intelli...",v0.4.0 - Unlock the potential of vibe coding,We always prioritize the vibe coding experien...,9,23,User,2025-12-17 00:44:34+00:00,2
1,supermemoryai/install-mcp/v1.10.0,156,27,156,TypeScript,2025-04-14 23:01:16+00:00,2026-01-09 15:48:22+00:00,[[]],v1.10.0,# [1.10.0](https://github.com/supermemoryai/in...,661,17,Organization,2025-09-23 21:12:13+00:00,3
2,infiniV/VoiceFlow/v1.3.1,240,18,240,TypeScript,2025-12-13 18:28:09+00:00,2026-01-11 16:29:11+00:00,[[]],VoiceFlow v1.3.1,## What's New\n\n- **Fix:** Resolved crash whe...,12,19,User,2026-01-03 22:50:06+00:00,9
3,rudrankriyam/Foundation-Models-Framework-Examp...,868,56,868,Swift,2025-06-09 19:29:16+00:00,2026-01-11 16:20:40+00:00,[[]],Release 0.2.0,Bug fixes and improvements,542,132,User,2025-07-09 08:27:20+00:00,44
4,sameerasw/my-internet/pre-release-07a1ecb932ce...,366,96,366,CSS,2025-02-08 11:26:59+00:00,2026-01-07 10:11:14+00:00,"[[""css"", ""custom"", ""custom-css"", ""customcss"", ...",Pre-Release 07a1ecb932cefb23e49dc4959a55fbe15e...,Pre-release for commit 07a1ecb932cefb23e49dc49...,383,105,User,2025-07-05 09:47:48+00:00,2
5,sameerasw/airsync-android/v2.2.0,179,12,179,Kotlin,2025-07-28 18:18:40+00:00,2026-01-08 13:30:34+00:00,"[[""android"", ""jetpack"", ""jetpack-compose"", ""ko...","Calls, Mirroring over tailscale, Clipboard cha...",# What's New in this feature drop?\r\n- Call s...,383,105,User,2025-12-24 15:53:49+00:00,4
6,oliverbravery/PrintGuard/v1.0.0b3,195,16,195,Python,2025-05-10 23:31:02+00:00,2026-01-11 10:28:59+00:00,[[]],Beta 3 - Multiplatform docker builds & RTSP st...,Beta 3\r\n- Better camera support and manageme...,7,19,User,2025-07-18 11:50:24+00:00,20
7,ExtremeXT/ExtremeROM/v2.6.1,501,124,501,Shell,2025-03-28 10:22:50+00:00,2026-01-11 01:46:16+00:00,[[]],v2.6.1,"<a href=""https://github.com/ExtremeXT/ExtremeR...",217,74,User,2025-08-22 14:48:07+00:00,16
8,MoowGlax/ygg-helper-dl/v1.3,116,5,116,JavaScript,2025-12-21 21:44:39+00:00,2026-01-10 19:49:08+00:00,[[]],Release v1.3,,2,6,User,2026-01-03 22:28:53+00:00,4
9,Aeastr/SettingsKit/2.0.0,244,5,244,Swift,2025-11-16 15:58:48+00:00,2026-01-11 05:00:41+00:00,[[]],2.0.0,,680,47,User,2026-01-03 22:10:57+00:00,5


In [65]:
trending_df = trending_fg.read()
trending_df

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.14s) 


Unnamed: 0,date,all_topics
0,2025-05-09 00:00:00+00:00,"['chatgpt', 'domain', 'github', [], 'node-base..."
1,2025-09-16 00:00:00+00:00,"['spec', 'tts', 'chatgpt', 'generative-ai', 'd..."
2,2025-01-07 00:00:00+00:00,"['obsidian', 'artificial-intelligence', 'image..."
3,2025-06-19 00:00:00+00:00,"['shadcn-ui', 'etl-framework', 'claude-usage',..."
4,2025-10-03 00:00:00+00:00,"['decentralized', 'enterprise', 'workflow-auto..."
...,...,...
370,2025-10-20 00:00:00+00:00,"['chineseocr', 'pp-ocr', 'spec', 'insomnia-alt..."
371,2025-12-17 00:00:00+00:00,"['artificial-intelligence', 'daisydisk', 'phon..."
372,2025-06-27 00:00:00+00:00,"['hacktoberfest', 'web'], 'chatgpt', 'browser'..."
373,2025-09-29 00:00:00+00:00,"['spec', 'chatgpt', 'windows'], 'dba-roadmap',..."


In [66]:
release_topics_df = release_df.copy()
trending_topics_df = trending_df.copy()
release_topics_df['date_only'] = pd.to_datetime(release_df['published_at']).dt.date
trending_topics_df['date'] = pd.to_datetime(trending_topics_df['date']).dt.date

topic_map = {row['date']: set(row['all_topics']) for _, row in trending_topics_df.iterrows()}

release_topics_df['is_trending'] = release_topics_df.apply(
    lambda row: bool(set(row['topics']) & topic_map.get(row['date_only'], set())),
    axis=1
)

print(f"Success Matching: {release_topics_df['is_trending'].sum()} / {len(release_topics_df)}")
release_topics_df

Success Matching: 10 / 17


Unnamed: 0,full_name,repo_stars,repo_forks,repo_watchers,language,repo_created_at,repo_updated_at,topics,release_name,release_body,author_followers,author_public_repos,author_type,published_at,first_week_star,date_only,is_trending
0,BrandeisPatrick/blank-space/v0.4.0,104,15,104,JavaScript,2025-10-10 20:45:39+00:00,2026-01-07 00:52:40+00:00,"[[""ai-agent"", ""antrophic"", ""artificial-intelli...",v0.4.0 - Unlock the potential of vibe coding,We always prioritize the vibe coding experien...,9,23,User,2025-12-17 00:44:34+00:00,2,2025-12-17,False
1,supermemoryai/install-mcp/v1.10.0,156,27,156,TypeScript,2025-04-14 23:01:16+00:00,2026-01-09 15:48:22+00:00,[[]],v1.10.0,# [1.10.0](https://github.com/supermemoryai/in...,661,17,Organization,2025-09-23 21:12:13+00:00,3,2025-09-23,True
2,infiniV/VoiceFlow/v1.3.1,240,18,240,TypeScript,2025-12-13 18:28:09+00:00,2026-01-11 16:29:11+00:00,[[]],VoiceFlow v1.3.1,## What's New\n\n- **Fix:** Resolved crash whe...,12,19,User,2026-01-03 22:50:06+00:00,9,2026-01-03,True
3,rudrankriyam/Foundation-Models-Framework-Examp...,868,56,868,Swift,2025-06-09 19:29:16+00:00,2026-01-11 16:20:40+00:00,[[]],Release 0.2.0,Bug fixes and improvements,542,132,User,2025-07-09 08:27:20+00:00,44,2025-07-09,True
4,sameerasw/my-internet/pre-release-07a1ecb932ce...,366,96,366,CSS,2025-02-08 11:26:59+00:00,2026-01-07 10:11:14+00:00,"[[""css"", ""custom"", ""custom-css"", ""customcss"", ...",Pre-Release 07a1ecb932cefb23e49dc4959a55fbe15e...,Pre-release for commit 07a1ecb932cefb23e49dc49...,383,105,User,2025-07-05 09:47:48+00:00,2,2025-07-05,False
5,sameerasw/airsync-android/v2.2.0,179,12,179,Kotlin,2025-07-28 18:18:40+00:00,2026-01-08 13:30:34+00:00,"[[""android"", ""jetpack"", ""jetpack-compose"", ""ko...","Calls, Mirroring over tailscale, Clipboard cha...",# What's New in this feature drop?\r\n- Call s...,383,105,User,2025-12-24 15:53:49+00:00,4,2025-12-24,False
6,oliverbravery/PrintGuard/v1.0.0b3,195,16,195,Python,2025-05-10 23:31:02+00:00,2026-01-11 10:28:59+00:00,[[]],Beta 3 - Multiplatform docker builds & RTSP st...,Beta 3\r\n- Better camera support and manageme...,7,19,User,2025-07-18 11:50:24+00:00,20,2025-07-18,True
7,ExtremeXT/ExtremeROM/v2.6.1,501,124,501,Shell,2025-03-28 10:22:50+00:00,2026-01-11 01:46:16+00:00,[[]],v2.6.1,"<a href=""https://github.com/ExtremeXT/ExtremeR...",217,74,User,2025-08-22 14:48:07+00:00,16,2025-08-22,True
8,MoowGlax/ygg-helper-dl/v1.3,116,5,116,JavaScript,2025-12-21 21:44:39+00:00,2026-01-10 19:49:08+00:00,[[]],Release v1.3,,2,6,User,2026-01-03 22:28:53+00:00,4,2026-01-03,True
9,Aeastr/SettingsKit/2.0.0,244,5,244,Swift,2025-11-16 15:58:48+00:00,2026-01-11 05:00:41+00:00,[[]],2.0.0,,680,47,User,2026-01-03 22:10:57+00:00,5,2026-01-03,True


In [67]:
release_topics_df = release_topics_df.drop(columns=['topics'])
release_df = release_topics_df
release_df

Unnamed: 0,full_name,repo_stars,repo_forks,repo_watchers,language,repo_created_at,repo_updated_at,release_name,release_body,author_followers,author_public_repos,author_type,published_at,first_week_star,date_only,is_trending
0,BrandeisPatrick/blank-space/v0.4.0,104,15,104,JavaScript,2025-10-10 20:45:39+00:00,2026-01-07 00:52:40+00:00,v0.4.0 - Unlock the potential of vibe coding,We always prioritize the vibe coding experien...,9,23,User,2025-12-17 00:44:34+00:00,2,2025-12-17,False
1,supermemoryai/install-mcp/v1.10.0,156,27,156,TypeScript,2025-04-14 23:01:16+00:00,2026-01-09 15:48:22+00:00,v1.10.0,# [1.10.0](https://github.com/supermemoryai/in...,661,17,Organization,2025-09-23 21:12:13+00:00,3,2025-09-23,True
2,infiniV/VoiceFlow/v1.3.1,240,18,240,TypeScript,2025-12-13 18:28:09+00:00,2026-01-11 16:29:11+00:00,VoiceFlow v1.3.1,## What's New\n\n- **Fix:** Resolved crash whe...,12,19,User,2026-01-03 22:50:06+00:00,9,2026-01-03,True
3,rudrankriyam/Foundation-Models-Framework-Examp...,868,56,868,Swift,2025-06-09 19:29:16+00:00,2026-01-11 16:20:40+00:00,Release 0.2.0,Bug fixes and improvements,542,132,User,2025-07-09 08:27:20+00:00,44,2025-07-09,True
4,sameerasw/my-internet/pre-release-07a1ecb932ce...,366,96,366,CSS,2025-02-08 11:26:59+00:00,2026-01-07 10:11:14+00:00,Pre-Release 07a1ecb932cefb23e49dc4959a55fbe15e...,Pre-release for commit 07a1ecb932cefb23e49dc49...,383,105,User,2025-07-05 09:47:48+00:00,2,2025-07-05,False
5,sameerasw/airsync-android/v2.2.0,179,12,179,Kotlin,2025-07-28 18:18:40+00:00,2026-01-08 13:30:34+00:00,"Calls, Mirroring over tailscale, Clipboard cha...",# What's New in this feature drop?\r\n- Call s...,383,105,User,2025-12-24 15:53:49+00:00,4,2025-12-24,False
6,oliverbravery/PrintGuard/v1.0.0b3,195,16,195,Python,2025-05-10 23:31:02+00:00,2026-01-11 10:28:59+00:00,Beta 3 - Multiplatform docker builds & RTSP st...,Beta 3\r\n- Better camera support and manageme...,7,19,User,2025-07-18 11:50:24+00:00,20,2025-07-18,True
7,ExtremeXT/ExtremeROM/v2.6.1,501,124,501,Shell,2025-03-28 10:22:50+00:00,2026-01-11 01:46:16+00:00,v2.6.1,"<a href=""https://github.com/ExtremeXT/ExtremeR...",217,74,User,2025-08-22 14:48:07+00:00,16,2025-08-22,True
8,MoowGlax/ygg-helper-dl/v1.3,116,5,116,JavaScript,2025-12-21 21:44:39+00:00,2026-01-10 19:49:08+00:00,Release v1.3,,2,6,User,2026-01-03 22:28:53+00:00,4,2026-01-03,True
9,Aeastr/SettingsKit/2.0.0,244,5,244,Swift,2025-11-16 15:58:48+00:00,2026-01-11 05:00:41+00:00,2.0.0,,680,47,User,2026-01-03 22:10:57+00:00,5,2026-01-03,True


In [68]:
# Calculate the duration of a repository
release_df['repo_duration'] = (release_df['repo_updated_at'] - release_df['repo_created_at']).dt.days
release_df = release_df.drop(columns=['repo_updated_at', 'repo_created_at'])
release_df

Unnamed: 0,full_name,repo_stars,repo_forks,repo_watchers,language,release_name,release_body,author_followers,author_public_repos,author_type,published_at,first_week_star,date_only,is_trending,repo_duration
0,BrandeisPatrick/blank-space/v0.4.0,104,15,104,JavaScript,v0.4.0 - Unlock the potential of vibe coding,We always prioritize the vibe coding experien...,9,23,User,2025-12-17 00:44:34+00:00,2,2025-12-17,False,88
1,supermemoryai/install-mcp/v1.10.0,156,27,156,TypeScript,v1.10.0,# [1.10.0](https://github.com/supermemoryai/in...,661,17,Organization,2025-09-23 21:12:13+00:00,3,2025-09-23,True,269
2,infiniV/VoiceFlow/v1.3.1,240,18,240,TypeScript,VoiceFlow v1.3.1,## What's New\n\n- **Fix:** Resolved crash whe...,12,19,User,2026-01-03 22:50:06+00:00,9,2026-01-03,True,28
3,rudrankriyam/Foundation-Models-Framework-Examp...,868,56,868,Swift,Release 0.2.0,Bug fixes and improvements,542,132,User,2025-07-09 08:27:20+00:00,44,2025-07-09,True,215
4,sameerasw/my-internet/pre-release-07a1ecb932ce...,366,96,366,CSS,Pre-Release 07a1ecb932cefb23e49dc4959a55fbe15e...,Pre-release for commit 07a1ecb932cefb23e49dc49...,383,105,User,2025-07-05 09:47:48+00:00,2,2025-07-05,False,332
5,sameerasw/airsync-android/v2.2.0,179,12,179,Kotlin,"Calls, Mirroring over tailscale, Clipboard cha...",# What's New in this feature drop?\r\n- Call s...,383,105,User,2025-12-24 15:53:49+00:00,4,2025-12-24,False,163
6,oliverbravery/PrintGuard/v1.0.0b3,195,16,195,Python,Beta 3 - Multiplatform docker builds & RTSP st...,Beta 3\r\n- Better camera support and manageme...,7,19,User,2025-07-18 11:50:24+00:00,20,2025-07-18,True,245
7,ExtremeXT/ExtremeROM/v2.6.1,501,124,501,Shell,v2.6.1,"<a href=""https://github.com/ExtremeXT/ExtremeR...",217,74,User,2025-08-22 14:48:07+00:00,16,2025-08-22,True,288
8,MoowGlax/ygg-helper-dl/v1.3,116,5,116,JavaScript,Release v1.3,,2,6,User,2026-01-03 22:28:53+00:00,4,2026-01-03,True,19
9,Aeastr/SettingsKit/2.0.0,244,5,244,Swift,2.0.0,,680,47,User,2026-01-03 22:10:57+00:00,5,2026-01-03,True,55


In [69]:
# one-hot author type
release_df['org_author'] = (release_df['author_type'] == 'Organization').astype(int)
release_df['user_author'] = (release_df['author_type'] == 'User').astype(int)
release_df = release_df.drop(columns=['author_type'])
release_df

Unnamed: 0,full_name,repo_stars,repo_forks,repo_watchers,language,release_name,release_body,author_followers,author_public_repos,published_at,first_week_star,date_only,is_trending,repo_duration,org_author,user_author
0,BrandeisPatrick/blank-space/v0.4.0,104,15,104,JavaScript,v0.4.0 - Unlock the potential of vibe coding,We always prioritize the vibe coding experien...,9,23,2025-12-17 00:44:34+00:00,2,2025-12-17,False,88,0,1
1,supermemoryai/install-mcp/v1.10.0,156,27,156,TypeScript,v1.10.0,# [1.10.0](https://github.com/supermemoryai/in...,661,17,2025-09-23 21:12:13+00:00,3,2025-09-23,True,269,1,0
2,infiniV/VoiceFlow/v1.3.1,240,18,240,TypeScript,VoiceFlow v1.3.1,## What's New\n\n- **Fix:** Resolved crash whe...,12,19,2026-01-03 22:50:06+00:00,9,2026-01-03,True,28,0,1
3,rudrankriyam/Foundation-Models-Framework-Examp...,868,56,868,Swift,Release 0.2.0,Bug fixes and improvements,542,132,2025-07-09 08:27:20+00:00,44,2025-07-09,True,215,0,1
4,sameerasw/my-internet/pre-release-07a1ecb932ce...,366,96,366,CSS,Pre-Release 07a1ecb932cefb23e49dc4959a55fbe15e...,Pre-release for commit 07a1ecb932cefb23e49dc49...,383,105,2025-07-05 09:47:48+00:00,2,2025-07-05,False,332,0,1
5,sameerasw/airsync-android/v2.2.0,179,12,179,Kotlin,"Calls, Mirroring over tailscale, Clipboard cha...",# What's New in this feature drop?\r\n- Call s...,383,105,2025-12-24 15:53:49+00:00,4,2025-12-24,False,163,0,1
6,oliverbravery/PrintGuard/v1.0.0b3,195,16,195,Python,Beta 3 - Multiplatform docker builds & RTSP st...,Beta 3\r\n- Better camera support and manageme...,7,19,2025-07-18 11:50:24+00:00,20,2025-07-18,True,245,0,1
7,ExtremeXT/ExtremeROM/v2.6.1,501,124,501,Shell,v2.6.1,"<a href=""https://github.com/ExtremeXT/ExtremeR...",217,74,2025-08-22 14:48:07+00:00,16,2025-08-22,True,288,0,1
8,MoowGlax/ygg-helper-dl/v1.3,116,5,116,JavaScript,Release v1.3,,2,6,2026-01-03 22:28:53+00:00,4,2026-01-03,True,19,0,1
9,Aeastr/SettingsKit/2.0.0,244,5,244,Swift,2.0.0,,680,47,2026-01-03 22:10:57+00:00,5,2026-01-03,True,55,0,1


In [70]:
# whether release publish date is weekday or weekend
release_df['publish_is_weekday'] = (release_df['published_at'].dt.dayofweek <= 4).astype(int)
release_df

Unnamed: 0,full_name,repo_stars,repo_forks,repo_watchers,language,release_name,release_body,author_followers,author_public_repos,published_at,first_week_star,date_only,is_trending,repo_duration,org_author,user_author,publish_is_weekday
0,BrandeisPatrick/blank-space/v0.4.0,104,15,104,JavaScript,v0.4.0 - Unlock the potential of vibe coding,We always prioritize the vibe coding experien...,9,23,2025-12-17 00:44:34+00:00,2,2025-12-17,False,88,0,1,1
1,supermemoryai/install-mcp/v1.10.0,156,27,156,TypeScript,v1.10.0,# [1.10.0](https://github.com/supermemoryai/in...,661,17,2025-09-23 21:12:13+00:00,3,2025-09-23,True,269,1,0,1
2,infiniV/VoiceFlow/v1.3.1,240,18,240,TypeScript,VoiceFlow v1.3.1,## What's New\n\n- **Fix:** Resolved crash whe...,12,19,2026-01-03 22:50:06+00:00,9,2026-01-03,True,28,0,1,0
3,rudrankriyam/Foundation-Models-Framework-Examp...,868,56,868,Swift,Release 0.2.0,Bug fixes and improvements,542,132,2025-07-09 08:27:20+00:00,44,2025-07-09,True,215,0,1,1
4,sameerasw/my-internet/pre-release-07a1ecb932ce...,366,96,366,CSS,Pre-Release 07a1ecb932cefb23e49dc4959a55fbe15e...,Pre-release for commit 07a1ecb932cefb23e49dc49...,383,105,2025-07-05 09:47:48+00:00,2,2025-07-05,False,332,0,1,0
5,sameerasw/airsync-android/v2.2.0,179,12,179,Kotlin,"Calls, Mirroring over tailscale, Clipboard cha...",# What's New in this feature drop?\r\n- Call s...,383,105,2025-12-24 15:53:49+00:00,4,2025-12-24,False,163,0,1,1
6,oliverbravery/PrintGuard/v1.0.0b3,195,16,195,Python,Beta 3 - Multiplatform docker builds & RTSP st...,Beta 3\r\n- Better camera support and manageme...,7,19,2025-07-18 11:50:24+00:00,20,2025-07-18,True,245,0,1,1
7,ExtremeXT/ExtremeROM/v2.6.1,501,124,501,Shell,v2.6.1,"<a href=""https://github.com/ExtremeXT/ExtremeR...",217,74,2025-08-22 14:48:07+00:00,16,2025-08-22,True,288,0,1,1
8,MoowGlax/ygg-helper-dl/v1.3,116,5,116,JavaScript,Release v1.3,,2,6,2026-01-03 22:28:53+00:00,4,2026-01-03,True,19,0,1,0
9,Aeastr/SettingsKit/2.0.0,244,5,244,Swift,2.0.0,,680,47,2026-01-03 22:10:57+00:00,5,2026-01-03,True,55,0,1,0


In [71]:
# one-hot programming language
ALL_LANGUAGES = [
    'C', 'Csharp', 'Cplusplus', 'CSS', 'Dart', 'Go', 'HTML', 'Java',
    'JavaScript', 'Kotlin', 'Lua', 'Other', 'PHP', 'Python', 'QML',
    'Rust', 'Shell', 'Svelte', 'Swift', 'TypeScript', 'Vue'
]

LANGUAGE_MAPPING = {
    'C': 'C',
    'C#': 'Csharp',
    'C++': 'Cplusplus',
    'CSS': 'CSS',
    'Dart': 'Dart',
    'Go': 'Go',
    'HTML': 'HTML',
    'Java': 'Java',
    'javaScript': 'javaScript',
    'Kotlin': 'Kotlin',
    'Lua': 'Lua',
    'PHP': 'PHP',
    'Python': 'Python',
    'qml': 'qml',
    'Rust': 'Rust',
    'Shell': 'Shell',
    'Svelte': 'Svelte',
    'Swift': 'Swift',
    'TypeScript': 'TypeScript',
    'Vue': 'Vue'
}

def one_hot_language(df):
    expected_columns = [f'language__{lang}' for lang in ALL_LANGUAGES]
    for col in expected_columns:
        df[col] = 0
    for idx, lang in enumerate(df['language']):
        if pd.isna(lang):
            col_name = 'language__Other'
        else:
            col_name = f'language__{LANGUAGE_MAPPING.get(lang, "Other")}'
        df.at[idx, col_name] = 1
    return df

release_df = one_hot_language(release_df)
release_df = release_df.drop(columns=['language'])
release_df

Unnamed: 0,full_name,repo_stars,repo_forks,repo_watchers,release_name,release_body,author_followers,author_public_repos,published_at,first_week_star,...,language__Other,language__PHP,language__Python,language__QML,language__Rust,language__Shell,language__Svelte,language__Swift,language__TypeScript,language__Vue
0,BrandeisPatrick/blank-space/v0.4.0,104,15,104,v0.4.0 - Unlock the potential of vibe coding,We always prioritize the vibe coding experien...,9,23,2025-12-17 00:44:34+00:00,2,...,1,0,0,0,0,0,0,0,0,0
1,supermemoryai/install-mcp/v1.10.0,156,27,156,v1.10.0,# [1.10.0](https://github.com/supermemoryai/in...,661,17,2025-09-23 21:12:13+00:00,3,...,0,0,0,0,0,0,0,0,1,0
2,infiniV/VoiceFlow/v1.3.1,240,18,240,VoiceFlow v1.3.1,## What's New\n\n- **Fix:** Resolved crash whe...,12,19,2026-01-03 22:50:06+00:00,9,...,0,0,0,0,0,0,0,0,1,0
3,rudrankriyam/Foundation-Models-Framework-Examp...,868,56,868,Release 0.2.0,Bug fixes and improvements,542,132,2025-07-09 08:27:20+00:00,44,...,0,0,0,0,0,0,0,1,0,0
4,sameerasw/my-internet/pre-release-07a1ecb932ce...,366,96,366,Pre-Release 07a1ecb932cefb23e49dc4959a55fbe15e...,Pre-release for commit 07a1ecb932cefb23e49dc49...,383,105,2025-07-05 09:47:48+00:00,2,...,0,0,0,0,0,0,0,0,0,0
5,sameerasw/airsync-android/v2.2.0,179,12,179,"Calls, Mirroring over tailscale, Clipboard cha...",# What's New in this feature drop?\r\n- Call s...,383,105,2025-12-24 15:53:49+00:00,4,...,0,0,0,0,0,0,0,0,0,0
6,oliverbravery/PrintGuard/v1.0.0b3,195,16,195,Beta 3 - Multiplatform docker builds & RTSP st...,Beta 3\r\n- Better camera support and manageme...,7,19,2025-07-18 11:50:24+00:00,20,...,0,0,1,0,0,0,0,0,0,0
7,ExtremeXT/ExtremeROM/v2.6.1,501,124,501,v2.6.1,"<a href=""https://github.com/ExtremeXT/ExtremeR...",217,74,2025-08-22 14:48:07+00:00,16,...,0,0,0,0,0,1,0,0,0,0
8,MoowGlax/ygg-helper-dl/v1.3,116,5,116,Release v1.3,,2,6,2026-01-03 22:28:53+00:00,4,...,1,0,0,0,0,0,0,0,0,0
9,Aeastr/SettingsKit/2.0.0,244,5,244,2.0.0,,680,47,2026-01-03 22:10:57+00:00,5,...,0,0,0,0,0,0,0,1,0,0


In [72]:
# Missing values
missing_values = release_df.isnull().sum()
missing_info = pd.DataFrame({
    'Missing nums': missing_values
})
print(missing_info[missing_info['Missing nums'] > 0])
release_df_filled = release_df.copy()
release_df_filled['release_name'] = release_df_filled['release_name'].fillna('')
release_df_filled['release_body'] = release_df_filled['release_body'].fillna('')
release_df_filled

Empty DataFrame
Columns: [Missing nums]
Index: []


Unnamed: 0,full_name,repo_stars,repo_forks,repo_watchers,release_name,release_body,author_followers,author_public_repos,published_at,first_week_star,...,language__Other,language__PHP,language__Python,language__QML,language__Rust,language__Shell,language__Svelte,language__Swift,language__TypeScript,language__Vue
0,BrandeisPatrick/blank-space/v0.4.0,104,15,104,v0.4.0 - Unlock the potential of vibe coding,We always prioritize the vibe coding experien...,9,23,2025-12-17 00:44:34+00:00,2,...,1,0,0,0,0,0,0,0,0,0
1,supermemoryai/install-mcp/v1.10.0,156,27,156,v1.10.0,# [1.10.0](https://github.com/supermemoryai/in...,661,17,2025-09-23 21:12:13+00:00,3,...,0,0,0,0,0,0,0,0,1,0
2,infiniV/VoiceFlow/v1.3.1,240,18,240,VoiceFlow v1.3.1,## What's New\n\n- **Fix:** Resolved crash whe...,12,19,2026-01-03 22:50:06+00:00,9,...,0,0,0,0,0,0,0,0,1,0
3,rudrankriyam/Foundation-Models-Framework-Examp...,868,56,868,Release 0.2.0,Bug fixes and improvements,542,132,2025-07-09 08:27:20+00:00,44,...,0,0,0,0,0,0,0,1,0,0
4,sameerasw/my-internet/pre-release-07a1ecb932ce...,366,96,366,Pre-Release 07a1ecb932cefb23e49dc4959a55fbe15e...,Pre-release for commit 07a1ecb932cefb23e49dc49...,383,105,2025-07-05 09:47:48+00:00,2,...,0,0,0,0,0,0,0,0,0,0
5,sameerasw/airsync-android/v2.2.0,179,12,179,"Calls, Mirroring over tailscale, Clipboard cha...",# What's New in this feature drop?\r\n- Call s...,383,105,2025-12-24 15:53:49+00:00,4,...,0,0,0,0,0,0,0,0,0,0
6,oliverbravery/PrintGuard/v1.0.0b3,195,16,195,Beta 3 - Multiplatform docker builds & RTSP st...,Beta 3\r\n- Better camera support and manageme...,7,19,2025-07-18 11:50:24+00:00,20,...,0,0,1,0,0,0,0,0,0,0
7,ExtremeXT/ExtremeROM/v2.6.1,501,124,501,v2.6.1,"<a href=""https://github.com/ExtremeXT/ExtremeR...",217,74,2025-08-22 14:48:07+00:00,16,...,0,0,0,0,0,1,0,0,0,0
8,MoowGlax/ygg-helper-dl/v1.3,116,5,116,Release v1.3,,2,6,2026-01-03 22:28:53+00:00,4,...,1,0,0,0,0,0,0,0,0,0
9,Aeastr/SettingsKit/2.0.0,244,5,244,2.0.0,,680,47,2026-01-03 22:10:57+00:00,5,...,0,0,0,0,0,0,0,1,0,0


## Uploading new data to the Feature Store

In [None]:
# Insert new data
release_fg.insert(release_df_filled, overwrite=False)