In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import requests
import csv

# Replace with your personal access token
YOUR_ACCESS_TOKEN = "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"

# Base URL for API calls
base_url = "https://api.github.com"

# Location to search
location = "Austin"

# Minimum followers count
min_followers = 100

# Maximum repositories to fetch per user
max_repos_per_user = 500


def get_users(location, min_followers):
    """
    Fetches users from a specific location with at least min_followers
    """
    users = []
    page = 1
    while True:
        url = f"{base_url}/search/users?q=location:{location}+followers:>{min_followers}&page={page}&per_page=100"
        headers = {"Authorization": f"token {YOUR_ACCESS_TOKEN}"}
        response = requests.get(url, headers=headers)
        data = response.json()

        if not data["items"]:
            break

        users.extend(data["items"])
        page += 1

    return users


def get_user_details(user):
    """
    Gets detailed information for a single user
    """
    url = f"{base_url}/users/{user['login']}"
    headers = {"Authorization": f"token {YOUR_ACCESS_TOKEN}"}
    response = requests.get(url, headers=headers)
    data = response.json()

    # Clean up company name
    data["company"] = (
        data["company"].strip() if data["company"] else ""
    ).upper().strip("@")

    return data


def get_user_repositories(user):
    """
    Gets repositories (up to max_repos_per_user) for a user
    """
    url = f"{base_url}/users/{user['login']}/repos"
    headers = {"Authorization": f"token {YOUR_ACCESS_TOKEN}"}
    response = requests.get(url, headers=headers)
    data = response.json()

    repositories = []
    for repo in data[:max_repos_per_user]:
        repo["login"] = user["login"]  # Add user login for reference
        repositories.append(repo)

    return repositories


def write_csv(data, filename):
    """
    Writes data to a CSV file
    """
    with open(filename, "w", newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=data[0].keys())
        writer.writeheader()
        writer.writerows(data)


def main():
    users = get_users(location, min_followers)
    user_data = []
    repositories = []

    for user in users:
        user_data.append(get_user_details(user))
        repositories.extend(get_user_repositories(user))

    write_csv(user_data, "/content/drive/My Drive/ProjectTDS1/users.csv")
    write_csv(repositories, "/content/drive/My Drive/ProjectTDS1/repositories.csv")

    # Create README.md
    with open("/content/drive/My Drive/ProjectTDS1/README.md", "w") as f:
        f.write(
            """## GitHub User and Repository Data

This data was scraped using the GitHub API and includes users in Austin with over 100 followers and their public repositories (up to 500 most recent).

### Analysis

* **Data Scraping:** Users were found by searching for location:"Austin" and filtering by followers:>100. Repositories were retrieved using the user's login information.
* **Interesting Finding:** (Add your own observation after analyzing the data)
* **Recommendation for Developers:** (Based on your analysis, suggest an actionable tip for developers)
"""
        )


if __name__ == "__main__":
    main()