In [1]:
!pip install requests pandas



In [2]:
import requests
import pandas as pd

GITHUB_API_URL = "https://api.github.com"
GITHUB_TOKEN = "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"

In [3]:
def get_users_in_seattle():
    query = "location:Seattle followers:>200"
    headers = {"Authorization": f"token {GITHUB_TOKEN}"}
    params = {"q": query, "per_page": 100}
    response = requests.get(f"{GITHUB_API_URL}/search/users", headers=headers, params=params)
    return response.json()["items"]

def get_user_details(username):
    headers = {"Authorization": f"token {GITHUB_TOKEN}"}
    response = requests.get(f"{GITHUB_API_URL}/users/{username}", headers=headers)
    return response.json()

def get_user_repos(username):
    headers = {"Authorization": f"token {GITHUB_TOKEN}"}
    response = requests.get(f"{GITHUB_API_URL}/users/{username}/repos", headers=headers)
    return response.json()



In [5]:
def clean_company_name(company):
    if company:
        company = company.strip().lstrip('@').upper()
    return company

def main():
    users = get_users_in_seattle()
    user_data = []
    repo_data = []

    for user in users:
        username = user["login"]
        user_details = get_user_details(username)
        user_data.append({
            "login": user_details["login"],
            "name": user_details.get("name", ""),
            "company": clean_company_name(user_details.get("company", "")),
            "location": user_details.get("location", ""),
            "email": user_details.get("email", ""),
            "hireable": user_details.get("hireable", ""),
            "bio": user_details.get("bio", ""),
            "public_repos": user_details.get("public_repos", 0),
            "followers": user_details.get("followers", 0),
            "following": user_details.get("following", 0),
            "created_at": user_details.get("created_at", "")
        })

        repos = get_user_repos(username)
        for repo in repos:
            repo_data.append({
                "login": username,
                "full_name": repo["full_name"],
                "created_at": repo["created_at"],
                "stargazers_count": repo["stargazers_count"],
                "watchers_count": repo["watchers_count"],
                "language": repo["language"],
                "has_projects": repo["has_projects"],
                "has_wiki": repo["has_wiki"],
                "license_name": repo["license"]["name"] if repo["license"] else ""
            })

    users_df = pd.DataFrame(user_data)
    repos_df = pd.DataFrame(repo_data)

    users_df.to_csv("users.csv", index=False)
    repos_df.to_csv("repositories.csv", index=False)

if __name__ == "__main__":
    main()

KeyError: 'items'

In [7]:
users_df = pd.read_csv("/content/users.csv")
repos_df = pd.read_csv("/content/repositories.csv")

In [8]:
users_df.head()

Unnamed: 0,login,name,company,location,email,hireable,bio,public_repos,followers,following,created_at,year,leader_strength,surname,bio_length,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18
0,vczh,,,"Seattle, WA, USA",,False,Main contributor of @vczh-libraries .\r\n\r\n...,12.0,17512.0,11.0,2011-05-07T08:30:48Z,2011.0,1459.333333,,8.0,,,,
1,bradfitz,Brad Fitzpatrick,TAILSCALE,Seattle,brad@danga.com,False,"LiveJournal, memcached, OpenID, @golang team (...",141.0,12107.0,41.0,2008-03-09T05:08:14Z,2008.0,288.261905,Fitzpatrick,17.0,,,,
2,munificent,Bob Nystrom,"GOOGLE, ON @DART-LANG","Seattle, WA",,False,"Programming language developer, ex-game develo...",49.0,9924.0,144.0,2009-01-13T15:37:46Z,2009.0,68.441379,Nystrom,15.0,,,,
3,tenderlove,Aaron Patterson,SHOPIFY,Seattle,,False,üíòüíôüíúüíóüíö‚ù§üíìüíõüíöüíó,357.0,9447.0,27.0,2008-03-14T20:04:17Z,2008.0,337.392857,Patterson,1.0,,,,
4,ahmetb,Ahmet Alp Balkan,LINKEDIN,"Seattle, WA",github@ahmet.im,True,Working on compute orchestration with Kubernet...,221.0,8212.0,34.0,2009-11-28T14:59:59Z,2009.0,234.628571,Balkan,9.0,,,Row Labels,Sum of leader_strength


In [9]:
top_5_users = users_df.sort_values(by=['followers'], ascending=False).head(5)['login'].tolist()
print(', '.join(top_5_users))

vczh, bradfitz, munificent, tenderlove, ahmetb


In [10]:
users_df['created_at'] = pd.to_datetime(users_df['created_at'])
earliest_5_users = users_df.sort_values(by=['created_at']).head(5)['login'].tolist()
print(', '.join(earliest_5_users))

topfunky, nex3, alloy, bradfitz, tenderlove


In [11]:
filtered_repos_df = repos_df.dropna(subset=['license_name'])
popular_licenses = filtered_repos_df.groupby('license_name')['license_name'].count().sort_values(ascending=False).head(3).index.tolist()
print(', '.join(popular_licenses))

MIT License, Apache License 2.0, Other


In [12]:
majority_company = users_df['company'].mode()[0]
print(majority_company)


MICROSOFT


In [13]:
popular_language = repos_df['language'].value_counts().idxmax()
print(popular_language)

JavaScript


In [14]:
'''
recent_users = users[users['created_at'] > '2020-01-01']
recent_repos = repos[repos['login'].isin(recent_users['login'])]
second_popular_language = recent_repos['language'].value_counts().index[1]
print(second_popular_language)
'''
recent_users = users_df[users_df['created_at'] > '2020-01-01']
recent_repos = repos_df[repos_df['login'].isin(recent_users['login'])]

# Check if there are at least two languages
language_counts = recent_repos['language'].value_counts()
if len(language_counts) > 1:
    second_popular_language = language_counts.index[1]
    print(second_popular_language)

CSS


In [15]:
avg_stars = repos_df.groupby('language')['stargazers_count'].mean().idxmax()
print(avg_stars)


Vim script


In [16]:
users_df['leader_strength'] = users_df['followers'] / (1 + users_df['following'])
top_leaders = users_df.sort_values(by='leader_strength', ascending=False).head(5)
print(','.join(top_leaders['login']))

awslabs,mission-peace,karan,cmuratori,nex3


In [17]:
correlation = users_df['followers'].corr(users_df['public_repos'])
print(f'{correlation:.3f}')


0.147


In [18]:
from scipy.stats import linregress

slope, intercept, r_value, p_value, std_err = linregress(users_df['public_repos'], users_df['followers'])
print(f'{slope:.3f}')

nan


In [19]:
projects_wiki_corr = repos_df['has_projects'].corr(repos_df['has_wiki'])
print(f'{projects_wiki_corr:.3f}')

0.368


In [20]:
hireable_following = users_df[users_df['hireable'] == True]['following'].mean()
non_hireable_following = users_df[users_df['hireable'] == False]['following'].mean()
diff_following = hireable_following - non_hireable_following
print(f'{diff_following:.3f}')

46.587


In [21]:
users_df['bio_length'] = users_df['bio'].str.len()
bio_followers_slope, _, _, _, _ = linregress(users_df.dropna(subset=['bio_length'])['bio_length'], users_df.dropna(subset=['bio_length'])['followers'])
print(f'{bio_followers_slope:.3f}')

2.659


In [22]:
repos_df['created_at'] = pd.to_datetime(repos_df['created_at'])
repos_df['weekday'] = repos_df['created_at'].dt.weekday
weekend_repos = repos_df[repos_df['weekday'] >= 5]
top_weekend_creators = weekend_repos['login'].value_counts().head(5)
print(','.join(top_weekend_creators.index))

eugeneyan,anvaka,ryanoasis,aidenybai,derv82


In [23]:
hireable_with_email = users_df[users_df['hireable'] == True]['email'].notna().mean()
non_hireable_with_email = users_df[users_df['hireable'] == False]['email'].notna().mean()
email_diff = hireable_with_email - non_hireable_with_email
print(f'{email_diff:.3f}')


0.125


In [24]:
users_df['surname'] = users_df['name'].str.split().str[-1]
common_surname = users_df['surname'].mode()[0]
print(common_surname)

Ai2
