https://www.jcchouinard.com/linkedin-api/

In [3]:
import json
import random
import requests
import string

In [4]:
def read_creds(filename):
    '''
    Store API credentials in a safe place.
    If you use Git, make sure to add the file to .gitignore
    '''
    with open(filename) as f:
        credentials = json.load(f)
    return credentials
 
creds = read_creds('credentials.json')
client_id, client_secret = creds['client_id'], creds['client_secret']
redirect_uri = creds['redirect_uri']

In [5]:
def create_CSRF_token():
    '''
    This function generates a random string of letters.
    It is not required by the Linkedin API to use a CSRF token.
    However, it is recommended to protect against cross-site request forgery
    '''
    letters = string.ascii_lowercase
    token = ''.join(random.choice(letters) for i in range(20))
    return token

In [6]:
api_url = 'https://www.linkedin.com/oauth/v2'
 
def authorize(api_url,client_id,client_secret,redirect_uri):
    '''
    Make a HTTP request to the authorization URL.
    It will open the authentication URL.
    Once authorized, it'll redirect to the redirect URI given.
    The page will look like an error. but it is not.
    You'll need to copy the redirected URL.
    '''
    # Request authentication URL
    csrf_token = create_CSRF_token()
    params = {
        'response_type': 'code',
        'client_id': client_id,
        'redirect_uri': redirect_uri,
        'state': csrf_token,
        'scope': 'r_liteprofile,r_emailaddress,w_member_social'
        }
 
    response = requests.get(f'{api_url}/authorization',params=params)
 
    print(f'''
    The Browser will open to ask you to authorize the credentials.\n
    Since we have not set up a server, you will get the error:\n
    This site can’t be reached. localhost refused to connect.\n
    This is normal.\n
    You need to copy the URL where you are being redirected to.\n
    ''')
 
    open_url(response.url)
 
    # Get the authorization verifier code from the callback url
    redirect_response = input('Paste the full redirect URL here:')
    auth_code = parse_redirect_uri(redirect_response)
    return auth_code

In [7]:
def open_url(url):
    '''
    Function to Open URL.
    Used to open the authorization link
    '''
    import webbrowser
    print(url)
    webbrowser.open(url)

In [8]:
def parse_redirect_uri(redirect_response):
    '''
    Parse redirect response into components.
    Extract the authorized token from the redirect uri.
    '''
    from urllib.parse import urlparse, parse_qs
 
    url = urlparse(redirect_response)
    url = parse_qs(url.query)
    return url['code'][0]

In [10]:
def save_token(filename,data):
    '''
    Write token to credentials file.
    '''
    data = json.dumps(data, indent = 4) 
    with open(filename, 'w') as f: 
        f.write(data)

In [11]:
def headers(access_token):
    '''
    Make the headers to attach to the API call.
    '''
    headers = {
    'Authorization': f'Bearer {access_token}',
    'cache-control': 'no-cache',
    'X-Restli-Protocol-Version': '2.0.0'
    }
    return headers

In [12]:
def auth(credentials):
    '''
    Run the Authentication.
    If the access token exists, it will use it to skip browser auth.
    If not, it will open the browser for you to authenticate.
    You will have to manually paste the redirect URI in the prompt.
    '''
    creds = read_creds(credentials)
    print(creds)
    client_id, client_secret = creds['client_id'], creds['client_secret']
    redirect_uri = creds['redirect_uri']
    api_url = 'https://www.linkedin.com/oauth/v2'
         
    if 'access_token' not in creds.keys(): 
        args = client_id,client_secret,redirect_uri
        auth_code = authorize(api_url,*args)
        access_token = refresh_token(auth_code,*args)
        creds.update({'access_token':access_token})
        save_token(credentials,creds)
    else: 
        access_token = creds['access_token']
    return access_token

In [23]:
credentials = 'credentials.json'
access_token = auth(credentials)


{'client_id': '86d1ws60zw4nbq', 'client_secret': 'cQGF92NVmpAAunlu', 'redirect_uri': 'http://localhost:8080', 'access token': 'AQXxXpr25-WZYKmljiTjiDp133N3YbuE6OE_br0-DZWuQ1d4l_P9Vr3t_I3shGYRcIVBLfIfxuvlYZft56DjbU8hrrFDfNv7FL4TLoxsHQqoBkbsIMWM0DWcJMYli-mtChBagjjE8uBG29sXenSsH6IwhrSr4_R2_1_E5sUZdiuGdILCRtnsymSy0rN2L9qhqcCwBbMCUNm-5b779xseBfs6ewd_MraqwNp0Nx77__ss8-k9bC4bCkJKvMRFmAUHDId8gdkaovomwzEFPTkVUbPuP6SbEiVUCawlwOtO_JiK7WSkcJgRchOKfF68GQdXiWScOYr9T3NkCaT-rtqZbPJG_I0yzw', 'access_token': 'AQUhZB7rEDOdJrRW-B65iSFMgtEz4t7yXUla-VpyP0WYjzetulD7JGMVQNu1imHCkKaQN9EB2v2PeAWnvCAngzkIe9VBoXJo0VLl3Wci28MZfJI-PWSXmvr2blgrWRCLrYx5rAyoyyffZipVCvwVnUIMRcvLYAjD8ToyoYgddioTwgYyIRSrUXAtk1QFRvGff3FN7y0GpIQaWIjien7W9P8vKT46pRPsaJvWhZfab6fKDNUxFuJSynOk8CiHqsgpySgoUmHQEEedhRi3d6nLaU8KmNFkLH_AtAzcLY1nTfs_el-tp2F3VF2S65hgyez6IMCXUxURcooLEyIIQ55qXXTSx0LnVQ'}


AQQuKEqjZ3QecHGwvwtw7a8uXdPRP-9VdHxbUgZgyvNZlGZWck-NEZ6kOc7uqDgNXIwS8JC93F4VXk-oyTx-RrXu2FQAyx-dsMiepXenv-Xp2tO2x8VQUArU6JGLVFaG_g2b6I9sDUIq8piUvkQSTDLAasTFCrKVi13L4s37af0XhBoXGfOyuXNfrQY1G8-BlCpclJNV1SM_k-pVsRw

In [52]:
import requests
 
from ln_oauth import auth, headers
 
def user_info(headers):
    '''
    Get user information from Linkedin
    '''
    response = requests.get('https://api.linkedin.com/v2/people/williamhgates/', headers = headers)
    user_info = response.json()
    return user_info
 
if __name__ == '__main__':
    credentials = 'credentials.json'
    access_token = auth(credentials) # Authenticate the API
    headers = headers(access_token) # Make the headers to attach to the API call.
    user_info = user_info(headers) # Get user info
    print(user_info)

{'client_id': '86d1ws60zw4nbq', 'client_secret': 'cQGF92NVmpAAunlu', 'redirect_uri': 'http://localhost:8080', 'access token': 'AQXxXpr25-WZYKmljiTjiDp133N3YbuE6OE_br0-DZWuQ1d4l_P9Vr3t_I3shGYRcIVBLfIfxuvlYZft56DjbU8hrrFDfNv7FL4TLoxsHQqoBkbsIMWM0DWcJMYli-mtChBagjjE8uBG29sXenSsH6IwhrSr4_R2_1_E5sUZdiuGdILCRtnsymSy0rN2L9qhqcCwBbMCUNm-5b779xseBfs6ewd_MraqwNp0Nx77__ss8-k9bC4bCkJKvMRFmAUHDId8gdkaovomwzEFPTkVUbPuP6SbEiVUCawlwOtO_JiK7WSkcJgRchOKfF68GQdXiWScOYr9T3NkCaT-rtqZbPJG_I0yzw', 'access_token': 'AQUhZB7rEDOdJrRW-B65iSFMgtEz4t7yXUla-VpyP0WYjzetulD7JGMVQNu1imHCkKaQN9EB2v2PeAWnvCAngzkIe9VBoXJo0VLl3Wci28MZfJI-PWSXmvr2blgrWRCLrYx5rAyoyyffZipVCvwVnUIMRcvLYAjD8ToyoYgddioTwgYyIRSrUXAtk1QFRvGff3FN7y0GpIQaWIjien7W9P8vKT46pRPsaJvWhZfab6fKDNUxFuJSynOk8CiHqsgpySgoUmHQEEedhRi3d6nLaU8KmNFkLH_AtAzcLY1nTfs_el-tp2F3VF2S65hgyez6IMCXUxURcooLEyIIQ55qXXTSx0LnVQ'}
{'serviceErrorCode': 100, 'message': 'Not enough permissions to access: GET /people/williamhgates/', 'status': 403}


https://developer.linkedin.com/docs/fields/basic-profile

I managed to get my own profile, but not others. In order to scrape for other people's profile, we need to use profile API, which is restricted to those developers approved by LinkedIn and subject to applicable data restrictions in their agreements.

https://towardsdatascience.com/mining-data-on-linkedin-9b70681b1467

In [47]:
import os
import csv

CSV_FILE = os.path.join('./', 'Connections.csv')

csvReader = csv.DictReader(open(CSV_FILE), delimiter=',', quotechar='"')
contacts = [row for row in csvReader]

In [49]:
workingAtGoogle = 0

for contact in contacts:
    for t in contact['Company'].split('/'):
        if (t == 'Google'):
            workingAtGoogle = workingAtGoogle+1
print('There are %d people who are working at Google.' % (workingAtGoogle))

There are 5 people who are working at Google.


Alternative: Make a profile on Linkedin for UChicago SSD, use selenium to get all linkedin url of the alumni, send them invites.

"Your LinkedIn data belongs to you, and you can download an archive any time or view the rich media you have uploaded."