# GET USERS DATA

In [59]:
import requests

In [60]:
token = ''
headers = {'Authorization': f'token {token}'}

In [117]:
base_url = 'https://api.github.com'
end_point = '/search/users'
query_parameter = '?q=location:Chicago+followers:>100'
page = 1
per_page = 100

### get users using github api and paging

In [118]:
chicago_users = []

while True:
    paging = f'&page={page}&per_page={per_page}'
    url = base_url+end_point+query_parameter+paging
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        chicago_users.extend(data.get('items', []))
        if 'Link' not in response.headers:
            break
        else:
            links = response.headers['Link']
            if 'rel="next"' not in links:
                break
        page += 1
    else:
        print("Error:", response.status_code, response.json())
        break

In [132]:
chicago_users

[{'login': 'cassidoo',
  'id': 1454517,
  'node_id': 'MDQ6VXNlcjE0NTQ1MTc=',
  'avatar_url': 'https://avatars.githubusercontent.com/u/1454517?v=4',
  'gravatar_id': '',
  'url': 'https://api.github.com/users/cassidoo',
  'html_url': 'https://github.com/cassidoo',
  'followers_url': 'https://api.github.com/users/cassidoo/followers',
  'following_url': 'https://api.github.com/users/cassidoo/following{/other_user}',
  'gists_url': 'https://api.github.com/users/cassidoo/gists{/gist_id}',
  'starred_url': 'https://api.github.com/users/cassidoo/starred{/owner}{/repo}',
  'subscriptions_url': 'https://api.github.com/users/cassidoo/subscriptions',
  'organizations_url': 'https://api.github.com/users/cassidoo/orgs',
  'repos_url': 'https://api.github.com/users/cassidoo/repos',
  'events_url': 'https://api.github.com/users/cassidoo/events{/privacy}',
  'received_events_url': 'https://api.github.com/users/cassidoo/received_events',
  'type': 'User',
  'user_view_type': 'public',
  'site_admin': T

### get users details in given format

In [133]:
users = []

for user in chicago_users:
    response = requests.get(user["url"], headers=headers)
    if response.status_code == 200:
        user_info = response.json()
        users.append({
            "login": user_info["login"],
            "name": user_info.get("name") or "",
            "company": user_info.get("company") or "",
            "location": user_info.get("location")  or "",
            "email": user_info.get("email")  or "",
            "hireable": user_info.get("hireable") or "",
            "bio": user_info.get("bio") or "",
            "public_repos": user_info["public_repos"]  or "",
            "followers": user_info["followers"]  or "",
            "following": user_info["following"]  or "",
            "created_at": user_info["created_at"]  or "",
        })
    else:
        print("Error:", response.status_code, response.json())
        break

In [134]:
users

[{'login': 'cassidoo',
  'name': 'Cassidy Williams',
  'company': 'GitHub',
  'location': 'Chicago, IL',
  'email': '',
  'hireable': '',
  'bio': 'Making memes and dreams... and software',
  'public_repos': 165,
  'followers': 13383,
  'following': 102,
  'created_at': '2012-02-20T16:36:23Z'},
 {'login': 'felangel',
  'name': 'Felix Angelov',
  'company': '@shorebirdtech',
  'location': 'Chicago',
  'email': 'felangelov@gmail.com',
  'hireable': '',
  'bio': 'software engineer by day, software engineer by night.',
  'public_repos': 125,
  'followers': 8680,
  'following': 67,
  'created_at': '2014-09-22T02:35:58Z'},
 {'login': 'dabeaz',
  'name': 'David Beazley',
  'company': 'Dabeaz, LLC',
  'location': 'Chicago',
  'email': 'dave@dabeaz.com',
  'hireable': '',
  'bio': "Author of the Python Essential Reference (Addison-Wesley), Python Cookbook (O'Reilly), and former computer science professor. Come take a class! ",
  'public_repos': 34,
  'followers': 5181,
  'following': '',
  'cre

### clean company name

In [138]:
a = users

for user in a:
    company = user.get('company', '').strip().upper()
    if company.startswith('@'):
        company = company[1:]
    user['company'] = company

In [139]:
a

[{'login': 'cassidoo',
  'name': 'Cassidy Williams',
  'company': 'GITHUB',
  'location': 'Chicago, IL',
  'email': '',
  'hireable': '',
  'bio': 'Making memes and dreams... and software',
  'public_repos': 165,
  'followers': 13383,
  'following': 102,
  'created_at': '2012-02-20T16:36:23Z'},
 {'login': 'felangel',
  'name': 'Felix Angelov',
  'company': 'SHOREBIRDTECH',
  'location': 'Chicago',
  'email': 'felangelov@gmail.com',
  'hireable': '',
  'bio': 'software engineer by day, software engineer by night.',
  'public_repos': 125,
  'followers': 8680,
  'following': 67,
  'created_at': '2014-09-22T02:35:58Z'},
 {'login': 'dabeaz',
  'name': 'David Beazley',
  'company': 'DABEAZ, LLC',
  'location': 'Chicago',
  'email': 'dave@dabeaz.com',
  'hireable': '',
  'bio': "Author of the Python Essential Reference (Addison-Wesley), Python Cookbook (O'Reilly), and former computer science professor. Come take a class! ",
  'public_repos': 34,
  'followers': 5181,
  'following': '',
  'crea

### export users data to CSV

In [140]:
import pandas as pd
users_df = pd.DataFrame(a)
users_df.to_csv("users.csv", index=False)

# GET REPOSITORIES DATA

In [170]:
page = 1
per_page = 100

In [178]:
repo_data = []

for user in chicago_users:
    repo_url = user['repos_url']
    repo_count = 0
    page = 1
    per_page = 100
    
    while True:
        paging = f'?page={page}&per_page={per_page}'
        url = repo_url + paging
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            repos = response.json()
            
            for repo in repos:
                repo_data.append({
                    "login": user['login'],
                    "full_name": repo.get("full_name") or "",
                    "created_at": repo.get("created_at") or "",
                    "stargazers_count": repo.get("stargazers_count") or 0,
                    "watchers_count": repo.get("watchers_count") or 0,
                    "language": repo.get("language") or "",
                    "has_projects": repo.get("has_projects") or False,
                    "has_wiki": repo.get("has_wiki") or False,
                    "license_name": repo["license"]["key"] if repo.get("license") else ""
                })
                
                repo_count += 1
                if repo_count >= 500:
                    break

            if 'Link' not in response.headers:
                break
            else:
                links = response.headers['Link']
                if 'rel="next"' not in links:
                    break
            page += 1
            
        else:
            print("Error:", response.status_code, response.json())
            break


In [179]:
repo_data

[{'login': 'cassidoo',
  'full_name': 'cassidoo/accordion',
  'created_at': '2020-01-23T13:19:32Z',
  'stargazers_count': 17,
  'watchers_count': 17,
  'language': 'JavaScript',
  'has_projects': True,
  'has_wiki': True,
  'license_name': ''},
 {'login': 'cassidoo',
  'full_name': 'cassidoo/agagd',
  'created_at': '2023-11-20T05:21:51Z',
  'stargazers_count': 1,
  'watchers_count': 1,
  'language': '',
  'has_projects': True,
  'has_wiki': False,
  'license_name': 'mit'},
 {'login': 'cassidoo',
  'full_name': 'cassidoo/ama',
  'created_at': '2020-06-09T02:22:18Z',
  'stargazers_count': 62,
  'watchers_count': 62,
  'language': '',
  'has_projects': False,
  'has_wiki': False,
  'license_name': ''},
 {'login': 'cassidoo',
  'full_name': 'cassidoo/ASL2048',
  'created_at': '2014-03-20T14:29:23Z',
  'stargazers_count': 0,
  'watchers_count': 0,
  'language': 'CSS',
  'has_projects': True,
  'has_wiki': True,
  'license_name': 'mit'},
 {'login': 'cassidoo',
  'full_name': 'cassidoo/astro'

### export to CSV

In [183]:
repos_df = pd.DataFrame(repo_data)
repos_df.to_csv("repositories.csv", index=False)