In [None]:
#!pip install requests pandas

In [None]:
import requests
import pandas as pd

GITHUB_TOKEN = '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'

GITHUB_API_URL = 'https://api.github.com'

headers = {
    'Authorization': f'token {GITHUB_TOKEN}'
}

In [None]:
# Fetch users in Seattle with over 200 followers
def fetch_users_in_seattle(min_followers=200):
    query = 'location:Seattle followers:>200'
    url = f'{GITHUB_API_URL}/search/users?q={query}&per_page=100'
    users = []
    page = 1

    while True:
        response = requests.get(f'{url}&page={page}', headers=headers)
        if response.status_code != 200:
            break

        data = response.json().get('items', [])
        if not data:
            break

        users.extend(data)
        page += 1

    return users

In [None]:
# Clean company names by trimming whitespace, removing leading '@' symbols, and converting to uppercase
def clean_company_name(company_name):
    if company_name:
        return company_name.strip().lstrip('@').upper()
    return ''

In [None]:
# Fetch user details
def fetch_user_details(login):
    response = requests.get(f'{GITHUB_API_URL}/users/{login}', headers=headers)
    if response.status_code == 200:
        return response.json()
    return None

In [None]:
# Fetch user repositories (up to 500 most recently pushed)
def fetch_user_repos(login):
    url = f'{GITHUB_API_URL}/users/{login}/repos?per_page=100&sort=pushed'
    repos = []
    page = 1

    while page <= 5:  # Max of 500 repositories
        response = requests.get(f'{url}&page={page}', headers=headers)
        if response.status_code != 200:
            break

        data = response.json()
        if not data:
            break

        repos.extend(data)
        page += 1

    return repos

In [None]:
# Save user data to users.csv
def save_users_to_csv(users):
    user_data = []
    for user in users:
        details = fetch_user_details(user['login'])
        if details:
            user_data.append({
                'login': details['login'],
                'name': details.get('name', ''),
                'company': clean_company_name(details.get('company', '')),
                'location': details.get('location', ''),
                'email': details.get('email', ''),
                'hireable': details.get('hireable', ''),
                'bio': details.get('bio', ''),
                'public_repos': details.get('public_repos', 0),
                'followers': details.get('followers', 0),
                'following': details.get('following', 0),
                'created_at': details.get('created_at', '')
            })

    df_users = pd.DataFrame(user_data)
    df_users.to_csv('users.csv', index=False)

In [None]:
def save_repos_to_csv(users):
    repo_data = []
    for user in users:
        repos = fetch_user_repos(user['login'])
        for repo in repos:
            repo_data.append({
                'login': user['login'],
                'full_name': repo['full_name'],
                'created_at': repo['created_at'],
                'stargazers_count': repo.get('stargazers_count', 0),
                'watchers_count': repo.get('watchers_count', 0),
                'language': repo.get('language', ''),
                'has_projects': repo.get('has_projects', False),
                'has_wiki': repo.get('has_wiki', False),
                'license_name': repo['license']['key'] if repo.get('license') else ''
            })

    df_repos = pd.DataFrame(repo_data)
    df_repos.to_csv('repositories.csv', index=False)

In [None]:
# Main function to run the script
def main():
    users = fetch_users_in_seattle()
    save_users_to_csv(users)
    save_repos_to_csv(users)

if __name__ == '__main__':
    main()