In [65]:
# Set up GitHub token
GITHUB_TOKEN = 'set_your_token'


In [66]:
import requests
import pandas as pd

HEADERS = {'Authorization': f'token {GITHUB_TOKEN}',
           'Accept': 'application/vnd.github.v3+json'}

# Initialize lists for user data and repository data
user_data = []
repository_data = []

# Function to fetch users with pagination
def fetch_users(page):
    url = f'https://api.github.com/search/users?q=location:Chicago+followers:>100&per_page=100&page={page}'
    response = requests.get(url, headers=HEADERS)
    return response.json()

# Loop through pages to get all users
page = 1
while True:
    users_response = fetch_users(page)
    users = users_response.get('items', [])

    if not users:  # Stop if there are no more users
        break

    # Process each user
    for user in users:
        login = user['login']
        user_response = requests.get(f'https://api.github.com/users/{login}', headers=HEADERS)
        userdetail = user_response.json()

        name = userdetail.get('name', "")
        company = (userdetail.get('company') or "").strip('@').upper().strip()
        location = userdetail.get('location', "")
        email = userdetail.get('email', "")
        hireable = "true" if userdetail.get('hireable') else "false" if userdetail.get('hireable') is not None else ""
        bio = userdetail.get('bio', "")
        public_repos = userdetail.get('public_repos', 0)
        followers = userdetail.get('followers', 0)
        following = userdetail.get('following', 0)
        created_at = userdetail.get('created_at', "")

        user_data.append([login, name, company, location, email, hireable, bio, public_repos, followers, following, created_at])

        # Fetch repositories for the user with pagination
        repo_page = 1
        while True:
            repos_response = requests.get(f'https://api.github.com/users/{login}/repos?per_page=100&page={repo_page}', headers=HEADERS)
            repos = repos_response.json()

            if not repos:  # Stop if there are no more repositories
                break

            for repo in repos:
                repo_login = repo['owner']['login']
                full_name = repo['full_name']
                repo_created_at = repo['created_at']
                stargazers_count = repo['stargazers_count']
                watchers_count = repo['watchers_count']
                language = repo['language']
                has_projects = "true" if repo.get('has_projects') else "false" if repo.get('has_projects') is not None else ""
                has_wiki = "true" if repo.get('has_wiki') else "false" if repo.get('has_wiki') is not None else ""
                license_name = repo['license']['name'] if repo.get('license') else ""

                repository_data.append([repo_login, full_name, repo_created_at, stargazers_count, watchers_count, language, has_projects, has_wiki, license_name])

            repo_page += 1  # Move to the next page of repositories

    page += 1  # Move to the next page of users

# Convert to DataFrame
users_df = pd.DataFrame(user_data, columns=['login', 'name', 'company', 'location', 'email', 'hireable', 'bio', 'public_repos', 'followers', 'following', 'created_at'])
repos_df = pd.DataFrame(repository_data, columns=['login', 'full_name', 'created_at', 'stargazers_count', 'watchers_count', 'language', 'has_projects', 'has_wiki', 'license_name'])

# Save to CSV
users_df.to_csv('users.csv', index=False)
repos_df.to_csv('repositories.csv', index=False)

print("User data extracted and saved to users.csv and repositories.csv.")


User data extracted and saved to users.csv and repositories.csv.
