In [None]:
import requests
import pandas as pd
import time

# Set up your GitHub API token
api_token = input("Enteer API Token: ")
headers = {'Authorization': f'token {api_token}'}

# Function to fetch users
def fetch_users():
    users_data = []
    page = 1
    while True:
        users_url = f"https://api.github.com/search/users?q=location:Dublin+followers:>50&page={page}&per_page=100"
        response = requests.get(users_url, headers=headers)
        data = response.json()
        if 'items' not in data or not data['items']:
            break
        users_data.extend(data['items'])
        page += 1
        time.sleep(1)  # Avoid hitting rate limits
    return users_data

# Get detailed user info
def get_user_details(users_data):
    users = []
    for user in users_data:
        user_response = requests.get(user['url'], headers=headers)
        user_info = user_response.json()

        company = user_info.get('company', '')
        if company:
            company = company.strip().lstrip('@').upper()

        users.append({
            'login': user_info['login'],
            'name': user_info.get('name', ''),
            'company': company,
            'location': user_info.get('location', ''),
            'email': user_info.get('email', ''),
            'hireable': 'true' if user_info.get('hireable') else 'false',
            'bio': user_info.get('bio', ''),
            'public_repos': user_info.get('public_repos', 0),
            'followers': user_info.get('followers', 0),
            'following': user_info.get('following', 0),
            'created_at': user_info.get('created_at', '')
        })
        time.sleep(0.5)  # Slow down requests slightly to avoid rate limits
    return users

# Function to fetch repositories
def fetch_repos(users):
    repos = []
    for user in users:
        page = 1
        user_repos = []
        while True:
            repos_url = f"https://api.github.com/users/{user['login']}/repos?sort=pushed&direction=desc&page={page}&per_page=100"
            repos_response = requests.get(repos_url, headers=headers)
            repos_data = repos_response.json()

            if not repos_data or len(user_repos) >= 500:
                break

            for repo in repos_data:
                if len(user_repos) >= 500:
                    break
                user_repos.append({
                    'login': user['login'],
                    'full_name': repo.get('full_name', ''),
                    'created_at': repo.get('created_at', ''),
                    'stargazers_count': repo.get('stargazers_count', 0),
                    'watchers_count': repo.get('watchers_count', 0),
                    'language': repo.get('language', ''),
                    'has_projects': 'true' if repo.get('has_projects') else 'false',
                    'has_wiki': 'true' if repo.get('has_wiki') else 'false',
                    'license_name': repo['license']['name'] if repo.get('license') else ''
                })
            page += 1
            time.sleep(1)  # Avoid rate limit issues
        repos.extend(user_repos)
    return repos

# Main execution
users_data = fetch_users()
users = get_user_details(users_data)
repos = fetch_repos(users)

# Save to CSV
users_df = pd.DataFrame(users)
users_df.to_csv('users.csv', index=False)
repos_df = pd.DataFrame(repos)
repos_df.to_csv('repositories.csv', index=False)

print("Data scraping and file creation completed.")
