In [1]:
import requests
import time
import csv

In [2]:
# GitHub API setup
BASE_URL = "https://api.github.com"
TOKEN = #personal Access Token
HEADERS = {"Authorization": f"Bearer {TOKEN}"}

In [3]:
# Function to search users with pagination
def search_users(location, min_followers, max_pages=10):
    users = []
    for page in range(1, max_pages + 1):
        url = f"{BASE_URL}/search/users"
        params = {
            "q": f"location:{location} followers:>{min_followers}",
            "per_page": 100,
            "page": page
        }
        
        response = requests.get(url, headers=HEADERS, params=params)
        if response.status_code != 200:
            print(f"Error fetching data: {response.status_code}")
            break
        
        data = response.json()
        users.extend(data.get("items", []))
        
        # Check if we reached the last page
        if "items" not in data or len(data["items"]) < 100:
            break  # No more pages left
        
        print(f"Fetched page {page}")
        time.sleep(2)  # Avoid hitting rate limits

    return users

In [4]:
# Write user data to CSV
def write_users_to_csv1(users):
    fieldnames = ["login", "html"]
    with open("users1.csv", "w", newline="") as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for user in users:
            x=user.get("login")
            writer.writerow({
                "login": "\'"+x+"\'",
            })

In [116]:
# Main function to fetch users and write to CSV
def main():
    location = "seattle"
    min_followers = 200
    users = search_users(location, min_followers)
    write_users_to_csv1(users)
    print(f"Fetched {len(users)} users.")

if __name__ == "__main__":
    main()


Fetched page 1
Fetched page 2
Fetched page 3
Fetched page 4
Fetched page 5
Fetched 519 users.


In [5]:
def fetch_user_details(username):
    """Fetch detailed information for a given username."""
    url = f"{BASE_URL}/users/{username}"
    response = requests.get(url, headers=HEADERS)
    if response.status_code == 200:
        data = response.json()
        # Clean company field
        data['company'] = clean_company_name(data.get('company', ''))
        return data
    else:
        print(f"Failed to fetch user details for {username}")
        return None

In [6]:
def fetch_user_repositories(username):
    """Fetch up to 500 repositories for a given username."""
    repos = []
    for page in range(1, 6):  # Up to 500 repos (5 pages * 100 repos per page)
        url = f"{BASE_URL}/users/{username}/repos"
        params = {"per_page": 100, "page": page}
        response = requests.get(url, headers=HEADERS, params=params)
        if response.status_code != 200:
            print(f"Failed to fetch repos for {username}")
            break
        
        data = response.json()
        repos.extend(data)
        
        # Break if less than 100 repos returned (end of repos)
        if len(data) < 100:
            break
        time.sleep(1)  # Rate limiting
    return repos

In [7]:
def clean_company_name(company):
    """Clean the company name: remove leading '@', trim, and convert to uppercase."""
    if company:
        return company.strip().lstrip('@').upper()
    return ""

In [8]:
def write_users_to_csv(users_data):
    """Write user data to users.csv."""
    fieldnames = ["login", "name", "company", "location", "email", "hireable", 
                  "bio", "public_repos", "followers", "following", "created_at"]
    with open("users.csv", "w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for user in users_data:
            writer.writerow({
                "login": user.get("login"),
                "name": user.get("name", ""),
                "company": user.get("company", ""),
                "location": user.get("location", ""),
                "email": user.get("email", ""),
                "hireable": user.get("hireable", False),
                "bio": user.get("bio", ""),
                "public_repos": user.get("public_repos", 0),
                "followers": user.get("followers", 0),
                "following": user.get("following", 0),
                "created_at": user.get("created_at", "")
            })

In [9]:
def write_repositories_to_csv(repositories_data):
    """Write repository data to repositories.csv."""
    fieldnames = ["login", "full_name", "created_at", "stargazers_count", 
                  "watchers_count", "language", "has_projects", "has_wiki", "license_name"]
    with open("repositories.csv", "w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for repo in repositories_data:
            writer.writerow({
                "login": repo["owner"]["login"],
                "full_name": repo.get("full_name", ""),
                "created_at": repo.get("created_at", ""),
                "stargazers_count": repo.get("stargazers_count", 0),
                "watchers_count": repo.get("watchers_count", 0),
                "language": repo.get("language", ""),
                "has_projects": repo.get("has_projects", False),
                "has_wiki": repo.get("has_wiki", False),
                "license_name": repo.get("license")["name"] if repo.get("license") else ""
            })

In [None]:
def main():
    # Assume users is a list of usernames you've already fetched
    users = ['vczh',
'bradfitz',
'munificent',
'tenderlove',
'ahmetb',
'koush',
'jtleek',
'awslabs',
'TheLarkInn',
'zpao',
'anvaka',
'mission-peace',
'karan',
'eugeneyan',
'garybernhardt',
'ottomated',
'argyleink',
'HarmJ0y',
'nolanlawson',
'brendandburns',
'schollz',
'cmuratori',
'aidenybai',
'colinhacks',
'juliemr',
'hyperb1iss',
'DanielRosenwasser',
'nex3',
'RyanCavanaugh',
'kevmoo',
'KillianLucas',
'awsdocs',
'allenai',
'praeclarum',
'xingyizhou',
'ashleymcnamara',
'ericniebler',
'cwilso',
'lukehoban',
'googlecolab',
'palkan',
'legomushroom',
'slundberg',
'richlander',
'keith',
'ImgBotApp',
'topfunky',
'deltakosh',
'teamdandelion',
'auth0',
'alloy',
'veler',
'piiswrong',
'natemcmaster',
'jbeda',
'yasoob',
'ongardie',
'TimDettmers',
'juanpflores',
'borismus',
'armankhondker',
'basnijholt',
'keesun',
'ryanoasis',
'rebornix',
'jeffmo',
'jonrohan',
'elfmaster',
'bashmohandes',
'antiagainst',
'broady',
'sandersn',
'lostintangent',
'fengli',
'derv82',
'jelbourn',
'jheer',
'TimothyGu',
'redblobgames',
'seankross',
'altercation',
'whiteship',
'mkheck',
'lpereira',
'filmgirl',
'kendrahavens',
'nmwsharp',
'msurguy',
'benvanik',
'Pessimistress',
'clareliguori',
'QuantConnect',
'tpn',
'zz85',
'jorgevgut',
'ingydotnet',
'djhohnstein',
'travisg',
'wiibrew',
'cpuguy83',
'jacob-ebey',
'chriskiehl',
'zenspider',
'timothycrosley',
'sergeyk',
'jeremycole',
'lzl124631x',
'bearpaw',
'PWhiddy',
'rynowak',
'tableau',
'iliana',
'parthnagarkar875',
'blueberrymusic',
'iagox86',
'snakajima',
'CamiWilliams',
'cowtowncoder',
'StudioWildcard1',
'shmsw25',
'anneomcl',
'aarongustafson',
'JustinBeckwith',
'sayedihashimi',
'josephsavona',
'leechristensen',
'mhagiwara',
'ljvmiranda921',
'NickCH-K',
'benmarwick',
'davidshariff',
'yuchenlin',
'RhinoSecurityLabs',
'AkariAsai',
'valueof',
'jeffwilcox',
'joeduffy',
'olivierlacan',
'garann',
'abcdabcd987',
'yzh119',
'sjvasquez',
'stevestreza',
'haberman',
'spro',
'jdavis',
'jbevain',
'gbowne1',
'frantic',
'transcranial',
'jakobzhao',
'ArthurHub',
'jacobdevlin-google',
'hugozhu',
'MariaSolOs',
'drbrain',
'mbrubeck',
'VincentGranville',
'leodemoura',
'samuelkarp',
'crutkas',
'andyleejordan',
'heckj',
'bryphe',
'PeterL1n',
'tunz',
'amdegroot',
'domaindrivendev',
'vlandham',
'trvrb',
'pcgeek86',
'jayshah19949596',
'juberti',
'beaucollins',
'd4l3k',
'sayar',
'jhamman',
'dvdzkwsk',
'qingsongedu',
'goshacmd',
'roblourens',
'instafluff',
'Konloch',
'FormidableLabs',
'oconnor663',
'kaniini',
'saulpw',
'jeaye',
'BboyAkers',
'ararslan',
'dmarx',
'reset',
'jmdobry',
'CaitieM20',
'azonenberg',
'Ameobea',
'analogrelay',
'lenadroid',
'daerduoCarey',
'zombieCraig',
'beccasaurus',
'mgmeyers',
'TheSarang',
'kanitw',
'BSVino',
'scottgu',
'li-xin-yi',
'mandliya',
'miguelsolorio',
'adrianhall',
'jmcphers',
'danielgtaylor',
'trivikr',
'kg',
'kieferrm',
'izhangzhihao',
'javierluraschi',
'nickbytes',
'KevinMarquette',
'chen0040',
'CodeMaxx',
'jspruance',
'iandunn',
'evocateur',
'StartAutomating',
'RandalLinden',
'ZihengJiang',
'bootstrap4cc',
'SecondThread',
'nlpxucan',
'xinw1012',
'Plagman',
'acheamponge',
'weswigham',
'codemillmatt',
'vikhyat',
'lakshmanok',
'sudheesh001',
'WaltRitscher',
'PetrochukM',
'prabhakar267',
'ryanmcgrath',
'viirya',
'anarchivist',
'nfmcclure',
'arokem',
'jslee02',
'CodingCat',
'cronokirby',
'jayleicn',
'tannewt',
'evandbrown',
'normj',
'alvinwan',
'joemarini',
'melanierichards',
'staticfloat',
'sacmehta',
'brandonbloom',
'cnishina',
'ipinfo',
'guozhangwang',
'jiasenlu',
'pjbull',
'rachmari',
'cryptexcode',
'jroesch',
'LGUG2Z',
'jef',
'davidmccabe',
'zzmp',
'myronmarston',
'tvillarete',
'scottyhq',
'Jeffwan',
'jonringer',
'fangchangma',
'jpmedley',
'yizhongw',
'xrochoa',
'jeffpar',
'bsipocz',
'kevinlin311tw',
'sytelus',
'homebysix',
'maniacbug',
'dcjones',
'abby-fuller',
'nlsandler',
'pbeshai',
'svermeulen',
'yongxuUSTC',
'bcongdon',
'Pilchie',
'marktoda',
'jgranick',
'AllenInstitute',
'morganherlocker',
'GeeLaw',
'autonomousapps',
'ettaboyle',
'eigensteve',
'nikhilk',
'evancohen',
'jonatan-ivanov',
'mattxwang',
'embeddinglayer',
'hexiang-hu',
'Philip-Scott',
'margaretmz',
'IanMitchell',
'carlasouza',
'TiesdeKok',
'IlIllII',
'Spiderpig86',
'meganrogge',
'jongio',
'lessw2020',
'saadq',
'ryan-roemer',
'clarketm',
'hhoppe',
'loic-sharma',
'varunchitre15',
'eliheuer',
'JohnPhamous',
'willkurt',
'uwdata',
'FasterXML',
'ivanoats',
'tomjoht',
'darko-mesaros',
'joshtynjala',
'garrettmoon',
'ditman',
'oracle-quickstart',
'wting',
'rohithasrk',
'awood45',
'ussjoin',
'kzys',
'nellshamrell',
'armanbilge',
'jnolis',
'kevinslin',
'eagereyes',
'ericlippert',
'emina',
'metasoarous',
'micahhausler',
'JamieMagee',
'mleibman',
'lvivski',
'NTaylorMullen',
'chhayac',
'cgranade',
'rastenis',
'spiffxp',
'maggiepint',
'iyzhang',
'hausdorff',
'scottnonnenberg-signal',
'colinrtwhite',
'divega',
'shangzhenyang',
'moznion',
'joshspicer',
'briandorsey',
'Gbps',
'dhalperi',
'brookr',
'alexlande',
'stephjs',
'jeffra',
'dshean',
'kirkshoop',
'jcrocholl',
'aws-robotics',
'aaronbrethorst',
'jiayuasu',
'robmikh',
'chrismaddalena',
'JustinXinLiu',
'ckkelvinchan',
'vinx13',
'Barnacules',
'natebosch',
'crazy4pi314',
'temilaj',
'adw96',
'Nickersoft',
'cscherrer',
'MichaelDrogalis',
'mtyka',
'JasonQSY',
'logancyang',
'rjleveque',
'alexngn',
'coderholic',
'mapsam',
'tgaddair',
'jaxxstorm',
'aria42',
'edemkumodzi',
'bitwiseman',
'Jack-Anstey',
'laylalaisy',
'compdemocracy',
'caelan',
'jason718',
'A-And',
'leondz',
'dennyglee',
'brianm',
'fredhohman',
'seanmiddleditch',
'tiffanyfay',
'eric',
'vgel',
'calebrob6',
'NotWoods',
'agocke',
'ProQuestionAsker',
'f5devcentral',
'anakryiko',
'xuhdev',
'ashblue',
'F5Networks',
'jpeddicord',
'mothran',
'azenla',
'shirhatti',
'andreiz',
'sbaer',
'stympy',
'JasonKessler',
'AmieDD',
'rlabrecque',
'ehashman',
'jefflembeck',
'erjohnso',
'krisselden',
'arsentieva',
'dshafik',
'grantr',
'elalish',
'Plazmaz',
'davepeck',
'chipturner',
'codefellows',
'hollenberry',
'colmmacc',
'ItalyPaleAle',
'melaniewalsh',
'xiw',
'fabiorocha',
'xieliaing',
'bharatsingh430',
'kimdhamilton',
'DinoChiesa',
'DeNeutoy',
'pelikhan',
'abgoswam',
'rob0rt',
'bhollis',
'jmesserly',
'joshfree',
'fancymax',
'scottnonnenberg',
'romanbb',
'geoffrich',
'alex-mohr',
'drub0y',
'cidrblock',
'williaster',
'plamoni',
'tnachen',
'thagomizer',
'leggett',
'andrewiggins',
'c5inco',
'mousetraps',
'runrunbear',
'benhillis',
'nquinlan',
'74hc595',
'liquidmetal',
'JonathanRaiman',
'japacible',
'stellaraccident',
'dstnbrkr',
'skinny85',
'adamthom',
'ionvision',
'LeeHolmes',
'iambmelt',
'khenidak',
'justinweiss',
'fulghum',
'dzharii',
'xiongchenyan',
'samruddhikhandale',
'AndrewSouthpaw',
'brandonjbjelland',
'huntergdavis',
'dougz',
'blackfalcon',
'rschwabco',
'znation',
'cgillum',
'EntilZha',
'sofiadparamo',
'PlayFab',
'jperl',
'arxanas',
'mbucchia',
'ylegall',
'maxkatz6',
'haotian-wang']
    
    # Collect all user and repository data
    users_data = []
    repositories_data = []
    
    for user in users:
        user_data = fetch_user_details(user)
        if user_data:
            users_data.append(user_data)
            repos = fetch_user_repositories(user)
            repositories_data.extend(repos)
        
        time.sleep(1)  # Rate limiting

    # Write to CSV files
    write_users_to_csv(users_data)
    write_repositories_to_csv(repositories_data)
    print("Data written to users.csv and repositories.csv.")

if __name__ == "__main__":
    main()