In [None]:
import requests
import csv
import time

# Replace with your GitHub personal access token
token = "Enter your token"
HEADERS = {'Authorization': f'token {TOKEN}'}

def get_users_by_city(city, min_followers):
    url = f'https://api.github.com/search/users?q=location:melbourne+followers:>100'
    users = []
    page = 1
    per_page = 30

    while True:
        # Add pagination parameters to the URL
        paginated_url = f"{url}&page={page}&per_page={per_page}"
        response = requests.get(paginated_url, headers=HEADERS)

        if response.status_code == 200:
            data = response.json()
            items = data.get('items', [])
            if not items:
                break  # No more results

            users.extend(items)

            # If less than the max items per page, we are likely on the last page
            if len(items) < per_page:
                break

            page += 1  # Increment page for the next request
            time.sleep(1)  # Optional: pause to avoid hitting rate limits

        else:
            print(f'Error: {response.status_code}, {response.text}')
            break

    return users

def save_users_to_csv(users, filename):
    with open(filename, 'w', newline='') as csvfile:
        fieldnames = ['login', 'name', 'company', 'location', 'email', 'bio', 'followers', 'public_repos', 'hireable', 'created_at','following']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for user in users:
            # Fetch additional details for each user
            user_details_response = requests.get(user['url'], headers=HEADERS)
            if user_details_response.status_code == 200:
                user_details = user_details_response.json()
                writer.writerow({
                    'login': user_details.get('login'),
                    'name': user_details.get('name'),
                    'company': user_details.get('company'),
                    'location': user_details.get('location'),
                    'email': user_details.get('email'),
                    'bio': user_details.get('bio'),
                    'followers': user_details.get('followers'),
                    'following':user_details.get('following'),
                    'public_repos': user_details.get('public_repos'),
                    'hireable': user_details.get('hireable'),
                    'created_at': user_details.get('created_at')
                })
                time.sleep(0.5)  # Optional: pause between user details requests to avoid rate limiting
            else:
                print(f"Failed to fetch details for {user['login']}: {user_details_response.status_code}")

# Example usage
city = 'melbourne'
min_followers = 100
users = get_users_by_city(city, min_followers)
save_users_to_csv(users, 'users.csv')
print("Data saved to users.csv")

In [None]:
import pandas as pd
df = pd.read_csv("/content/users.csv")
lt = df['login'].unique().tolist()
len(lt)

In [None]:
from datetime import datetime
import requests
import pandas as pd
import time

# Use your GitHub token here for a higher rate limit
token = "Enter your token"
headers = {'Authorization': f'token {token}'}

def get_all_repos(username):
    """
    Fetches all public repositories for a GitHub user, handling pagination.
    """
    all_repos = []
    page = 1

    while True:
        url = f'https://api.github.com/users/{username}/repos?page={page}&per_page=100'
        response = requests.get(url, headers=headers)

        if response.status_code != 200:
            print(f"Failed to fetch data for {username}: {response.status_code}")
            break

        repos = response.json()
        if not repos:
            break  # Exit the loop if no more repos are found

        all_repos.extend(repos)
        page += 1
        time.sleep(0.5)  # Optional: Throttle requests to avoid hitting the rate limit

    return all_repos

def save_limited_repos_to_csv(usernames, filename='repositories.csv'):
    """
    Fetches repositories for multiple users and saves up to 500 entries per user
    (sorted by `pushed_at` for users with more than 500 repositories).
    """
    all_repos_data = []

    for username in usernames:
        repos = get_all_repos(username)

        # Apply sorting ONLY if user has more than 500 repositories
        if len(repos) > 500:
            repos_sorted = sorted(
                repos,
                key=lambda x: datetime.strptime(x['pushed_at'], '%Y-%m-%dT%H:%M:%SZ'),
                reverse=True
            )
            repos = repos_sorted[:500]  # Take the most recent 500 repositories

        # Process repository data
        for repo in repos:
            all_repos_data.append({
                'username': username,
                'name': repo.get('name'),
                'full_name': repo.get('full_name'),
                'private': repo.get('private'),
                'html_url': repo.get('html_url'),
                'description': repo.get('description'),
                'fork': repo.get('fork'),
                'created_at': repo.get('created_at'),
                'updated_at': repo.get('updated_at'),
                'pushed_at': repo.get('pushed_at'),
                'stargazers_count': repo.get('stargazers_count'),
                'watchers_count': repo.get('watchers_count'),
                'language': repo.get('language'),
                'has_issues': repo.get('has_issues'),
                'has_projects': repo.get('has_projects'),
                'has_wiki': repo.get('has_wiki'),
                'license': repo.get('license', {}).get('key') if repo.get('license') else None,
                'forks_count': repo.get('forks_count')
            })
        print(f"Processed repositories for {username} (Total: {len(repos)})")

    # Convert list of all repos to DataFrame and save
    df = pd.DataFrame(all_repos_data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

# Example usage
usernames = lt
save_limited_repos_to_csv(usernames)