In [1]:
!pip install requests pymongo tqdm




[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os

In [4]:
from dotenv import load_dotenv
load_dotenv(dotenv_path="password.env")

True

In [6]:
import os

GITHUB_API_KEY = os.getenv("GITHUB_API_KEY")
print("Token:", GITHUB_API_KEY)  # Should print your token, not None or empty string

HEADERS = {
    "Authorization": f"token {GITHUB_API_KEY}",
    "Accept": "application/vnd.github.v3+json"
}


Token: ***REMOVED***


In [7]:
def get_json(url):
    while True:
        response = requests.get(url, headers=HEADERS)  # headers must be here!
        if response.status_code == 403:
            # rate limit handling...
            pass
        elif response.status_code == 401:
            raise Exception("Unauthorized: Check your GitHub token!")
        response.raise_for_status()
        return response.json()


In [8]:
import os
import requests
from dotenv import load_dotenv

load_dotenv()  # or load_dotenv(dotenv_path="password.env")

token = os.getenv("GITHUB_API_KEY")
print("Token:", token)

headers = {"Authorization": f"token {token}"}
response = requests.get("https://api.github.com/users?per_page=1", headers=headers)

print("Status code:", response.status_code)
print("Response:", response.json())


Token: ***REMOVED***
Status code: 200
Response: [{'login': 'mojombo', 'id': 1, 'node_id': 'MDQ6VXNlcjE=', 'avatar_url': 'https://avatars.githubusercontent.com/u/1?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/mojombo', 'html_url': 'https://github.com/mojombo', 'followers_url': 'https://api.github.com/users/mojombo/followers', 'following_url': 'https://api.github.com/users/mojombo/following{/other_user}', 'gists_url': 'https://api.github.com/users/mojombo/gists{/gist_id}', 'starred_url': 'https://api.github.com/users/mojombo/starred{/owner}{/repo}', 'subscriptions_url': 'https://api.github.com/users/mojombo/subscriptions', 'organizations_url': 'https://api.github.com/users/mojombo/orgs', 'repos_url': 'https://api.github.com/users/mojombo/repos', 'events_url': 'https://api.github.com/users/mojombo/events{/privacy}', 'received_events_url': 'https://api.github.com/users/mojombo/received_events', 'type': 'User', 'user_view_type': 'public', 'site_admin': False}]


In [9]:
import os
from dotenv import load_dotenv
import requests
import time
from pymongo import MongoClient
from tqdm import tqdm

load_dotenv()  # Load env vars

GITHUB_TOKEN = os.getenv("GITHUB_API_KEY")
if not GITHUB_TOKEN:
    raise Exception("GitHub API key is missing!")

HEADERS = {
    "Authorization": f"token {GITHUB_TOKEN}",
    "Accept": "application/vnd.github.v3+json"
}

MONGO_URI = os.getenv("MONGO_URI")
client = MongoClient(MONGO_URI)
db = client["github_db"]
collection = db["user_data"]

# ... rest of your code ...


# === Helper Functions ===
def get_json(url):
    response = requests.get(url, headers=HEADERS)
    if response.status_code == 403:  # rate limit
        time.sleep(60)
        response = requests.get(url, headers=HEADERS)
    response.raise_for_status()
    return response.json()

def get_users_batch(since=0, per_page=100):
    url = f"https://api.github.com/users?since={since}&per_page={per_page}"
    return get_json(url)

def get_user_details(username):
    url = f"https://api.github.com/users/{username}"
    return get_json(url)

def get_list(url, key='login'):
    return [item[key] for item in get_json(url)]

def get_starred_or_subs(url):
    return [item['html_url'] for item in get_json(url)]

def get_languages(username):
    repos = get_json(f"https://api.github.com/users/{username}/repos")
    language_data = {}
    for repo in repos:
        langs = get_json(repo['languages_url'])
        for lang, count in langs.items():
            language_data[lang] = language_data.get(lang, 0) + count
    return language_data

def get_total_commits(username):
    repos = get_json(f"https://api.github.com/users/{username}/repos")
    total = 0
    for repo in repos:
        if repo.get('fork'):
            continue
        url = repo['commits_url'].replace("{/sha}", "")
        commits = requests.get(url, headers=HEADERS, params={'per_page': 1}).json()
        if isinstance(commits, list):
            total += len(commits)
    return total

# === Core Function to Fetch and Store User Data ===
def collect_and_store_user(username):
    try:
        user = get_user_details(username)
        data = {
            "Login": username,
            "Name": user.get("name"),
            "Bio": user.get("bio"),
            "Public Repositories": user.get("public_repos"),
            "Followers Count": user.get("followers"),
            "Following Count": user.get("following"),
            "Created At": user.get("created_at"),
            "Updated At": user.get("updated_at"),
            "Avatar URL": user.get("avatar_url"),
            "Profile URL": user.get("html_url"),
            "Followers List": get_list(f"https://api.github.com/users/{username}/followers"),
            "Following List": get_list(f"https://api.github.com/users/{username}/following"),
            "Starred Repositories": get_starred_or_subs(f"https://api.github.com/users/{username}/starred"),
            "Subscriptions": get_starred_or_subs(f"https://api.github.com/users/{username}/subscriptions"),
            "Organizations": get_list(f"https://api.github.com/users/{username}/orgs"),
            "Languages": get_languages(username),
            "Total Commits": get_total_commits(username)
        }
        collection.update_one({"Login": username}, {"$set": data}, upsert=True)
    except Exception as e:
        print(f"❌ Error for {username}: {e}")

# === Batch Processing ===
def batch_process_users(start_since=0, batches=1, per_page=10):
    since = start_since
    for _ in range(batches):
        users = get_users_batch(since, per_page)
        if not users:
            break
        for user in tqdm(users, desc="Processing batch"):
            collect_and_store_user(user['login'])
        since = users[-1]['id']

# === Fallback for Missing Username ===
def fetch_user_if_not_exists(username):
    if collection.find_one({"Login": username}):
        print(f"✅ User {username} already exists in DB.")
    else:
        print(f"🔍 Fetching missing user: {username}")
        collect_and_store_user(username)

# === Example Usage ===
# Process first 2 batches of 100 users
batch_process_users(start_since=0, batches=2)

Processing batch: 100%|██████████| 10/10 [03:38<00:00, 21.90s/it]
Processing batch: 100%|██████████| 10/10 [03:13<00:00, 19.32s/it]
