In [13]:
import requests
import time
import csv
from requests.adapters import HTTPAdapter
from requests.exceptions import Timeout, RequestException
from requests.packages.urllib3.util.retry import Retry

# GitHub API token and headers
GITHUB_TOKEN = "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"
HEADERS = {
    "Authorization": f"token {GITHUB_TOKEN}",
    "Accept": "application/vnd.github.v3+json"
}

# Setting up retries with backoff for network stability
retry_strategy = Retry(
    total=3,  # Retry up to 3 times
    status_forcelist=[429, 500, 502, 503, 504],  # Retry on these status codes
    backoff_factor=1  # Exponential backoff starting at 1 second
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session = requests.Session()
session.mount("https://", adapter)
session.headers.update(HEADERS)

# Function to fetch data from GitHub API with retries and timeouts
def fetch_data_with_retries(url):
    try:
        response = session.get(url, timeout=10)  # 10-second timeout
        response.raise_for_status()
        return response.json()
    except Timeout:
        print(f"Request timed out for URL: {url}")
    except RequestException as e:
        print(f"Error fetching data from {url}: {e}")
    return None

# Fetch users from a specific city with a minimum follower count
def fetch_users(city, min_followers=50):
    users = []
    page = 1
    while True:
        url = f"https://api.github.com/search/users?q=location:{city}+followers:>{min_followers}&page={page}&per_page=100"
        data = fetch_data_with_retries(url)
        if data and 'items' in data:
            users.extend(data['items'])
            if len(data['items']) < 100:
                break  # Break if fewer than 100 users are returned (last page)
            page += 1
            time.sleep(1)  # Avoid hitting rate limit
        else:
            break
    return users

def fetch_repositories(user_login):
    repos = []
    page = 1
    while True:
        # Request repositories sorted by the most recently pushed
        url = f"https://api.github.com/users/{user_login}/repos?sort=pushed&direction=desc&page={page}&per_page=100"
        data = fetch_data_with_retries(url)
        
        if data:
            repos.extend(data)
            if len(data) < 100 or len(repos) >= 500:
                break  # Stop if fewer than 100 repos are returned or we reach 500 repos
            page += 1
            time.sleep(1)  # Delay between each page request
        else:
            break
            
    return repos[:500]  # Return only the first 500 most recent repositories

# Fetch user details for a specific user
def fetch_userdetails(user_login):
    url = f"https://api.github.com/users/{user_login}"
    data = fetch_data_with_retries(url)
    return data if data else {}

# Write data to CSV
def write_to_csv(users,users_details, repositories):
    with open("users.csv", mode="w", newline="", encoding="utf-8") as users_file:
        writer = csv.writer(users_file)
        writer.writerow(["login", "name", "company", "location", "email", "hireable", "bio", "public_repos", "followers", "following", "created_at"])
        for user in users_details:
            writer.writerow([
                user.get("login", ""),
                user.get("name", ""),
                (user.get("company", "") or "").strip().lstrip("@").upper(),
                user.get("location", ""),
                user.get("email", ""),
                str(user.get("hireable", False)).lower(),
                user.get("bio", ""),
                user.get("public_repos", ""),
                user.get("followers", ""),
                user.get("following", ""),
                user.get("created_at", "")
            ])

    with open("repositories.csv", mode="w", newline="", encoding="utf-8") as repos_file:
        writer = csv.writer(repos_file)
        writer.writerow(["login", "full_name", "created_at", "stargazers_count", "watchers_count", "language", "has_projects", "has_wiki", "license_name"])
        for repo in repositories:
            writer.writerow([
                repo.get("owner", {}).get("login", ""),
                repo.get("full_name", ""),
                repo.get("created_at", ""),
                repo.get("stargazers_count", 0),
                repo.get("watchers_count", 0),
                repo.get("language", ""),
                str(repo.get("has_projects", False)).lower(),
                str(repo.get("has_wiki", False)).lower(),
                repo.get("license", {}).get("name", "") if repo.get("license") else ""
            ])


# Main function to execute the workflow
def main():
    city = "Dublin"
    users = fetch_users(city,min_followers=50)
    repositories = []
    users_details = []

    # For each user, fetch their repositories and user details
    for user in users:
        user_repos = fetch_repositories(user["login"])
        user_details = fetch_userdetails(user["login"])  # Fetch user details
        repositories.extend(user_repos)
        users_details.append(user_details)  # Append user details dictionary to the list
    
    # Write users and repositories data to CSV files
    write_to_csv(users, users_details, repositories)

In [14]:
if __name__ == "__main__":
    main()

In [3]:
users

NameError: name 'users' is not defined