In [1]:
# imports
import sys
import os
from time import sleep
from dotenv import load_dotenv

from selenium import webdriver
from selenium.webdriver.chrome.options import Options

from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
# from selenium.webdriver.common.action_chains import ActionChains
# from selenium.webdriver.common.touch_actions import TouchActions
# from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# from selenium.common.exceptions import TimeoutException
# from selenium.common.exceptions import WebDriverException
from selenium.common.exceptions import NoSuchElementException
# from selenium.common.exceptions import StaleElementReferenceException
# from selenium.common.exceptions import ElementNotInteractableException
# from selenium.common.exceptions import ElementClickInterceptedException
# from selenium.common.exceptions import ElementNotVisibleException
# from selenium.common.exceptions import ElementNotSelectableException
# from selenium.common.exceptions import InvalidElementStateException
# from selenium.common.exceptions import InvalidArgumentException
# from selenium.common.exceptions import NoSuchWindowException
# from selenium.common.exceptions import NoSuchFrameException
# from selenium.common.exceptions import NoAlertPresentException
# from selenium.common.exceptions import NoSuchAttributeException
# from selenium.common.exceptions import MoveTargetOutOfBoundsException
# from selenium.common.exceptions import InvalidCookieDomainException
# from selenium.common.exceptions import UnableToSetCookieException

In [2]:
# os paths
USER_PATH = os.path.expanduser('~')
DESKTOP_FDNAME = 'Desktop'
DESKTOP_PATH = os.path.join(USER_PATH, DESKTOP_FDNAME)
PARENT_PATH = os.path.dirname(os.getcwd())
EXPORTS_FDNAME = 'fff_exports'
EXPORTS_DESKTOP_PATH = os.path.join(DESKTOP_PATH, EXPORTS_FDNAME)
EXPORTS_PARENT_PATH = os.path.join(PARENT_PATH, EXPORTS_FDNAME)

# get envs
load_dotenv()
LI_USERNAME = os.getenv('LI_USERNAME')
LI_PASSWORD = os.getenv('LI_PASSWORD')
LI_DOMAIN = 'https://www.linkedin.com'
LI_URL_FEED = 'https://www.linkedin.com/feed/'
LI_URL_LOGIN = 'https://www.linkedin.com/login/'
LI_URL_FOLLOWERS = 'https://www.linkedin.com/mynetwork/network-manager/people-follow/followers/'
LI_URL_CONNECTIONS = 'https://www.linkedin.com/mynetwork/invite-connect/connections/'
LI_CLASS_FOLLOWERS_ENTITY = 'span.entity-result__title-text a.app-aware-link'
LI_CLASS_CONNECTIONS_ENTITY = 'a.mn-connection-card__link'
LI_CLASS_CONNECTIONS_NAME = 'span.mn-connection-card__name'
LI_CLASS_LOAD_BTNCLASS = 'scaffold-finite-scroll__load-button'

In [3]:
# options
CHROME_OPT = Options()
CHROME_OPT.add_experimental_option('detach', True)
CHROME_OPT.add_experimental_option('excludeSwitches', ['enable-logging'])
CHROME_OPT.add_argument('--disable-logging')
CHROME_OPT.add_argument('--log-level=3')
CHROME_OPT.add_argument('--start-maximized')
# CHROME_OPT.add_argument('--headless')
# CHROME_OPT.add_argument('--no-sandbox')
# CHROME_OPT.add_argument('--disable-setuid-sandbox')
CHROME_OPT.add_argument('--disable-gpu')
CHROME_OPT.add_argument('--disable-dev-shm-usage')
CHROME_OPT.add_argument('--disable-webgl')
CHROME_OPT.add_argument('--disable-web-security')
CHROME_OPT.add_argument('--disable-infobars')
CHROME_OPT.add_argument('--disable-extensions')
CHROME_OPT.add_argument('--disable-default-apps')
CHROME_OPT.add_argument('--disable-notifications')
CHROME_OPT.add_argument('--disable-popup-blocking')
CHROME_OPT.add_argument('--disable-translate')
# CHROME_OPT.add_argument('lang=ko_KR')
# CHROME_OPT.add_argument('--ignore-certificate-errors')
# CHROME_OPT.add_argument('--ignore-ssl-errors')
CHROME_OPT.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

# constants
SLP_HARD = 4
SLP_SOFT = 2
MAX_WAIT = 20
OTP_WAIT = 60

In [4]:
# li_fff_checker.py

# if (__name__ != '__main__'):
#     sys.exit()

driver = webdriver.Chrome(options=CHROME_OPT)
driver.implicitly_wait(MAX_WAIT)

def quit_and_exit():
    driver.quit()
    sys.exit()

def print_info():
    print(f'Title: {driver.title}',
          f'URL: {driver.current_url}',
          sep='\n└ ')

def login():
    driver.get(LI_URL_LOGIN)
    print_info()

    username = driver.find_element(By.ID, 'username')
    username.send_keys(LI_USERNAME)
    password = driver.find_element(By.ID, 'password')
    password.send_keys(LI_PASSWORD)
    password.send_keys(Keys.RETURN)

    try:
        WebDriverWait(driver, OTP_WAIT).until(EC.url_contains(LI_URL_FEED))
    except Exception as e:
        print(e)
        quit_and_exit()
    else:
        print_info()

def load_to_bottom(url):
    driver.get(url)
    print_info()

    last_height = driver.execute_script('return document.body.scrollHeight')
    indeed_finish_cnt = 0

    while True:
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.END)
        if url == LI_URL_CONNECTIONS:
            sleep(SLP_HARD)
        else:
            sleep(SLP_SOFT)

        try:
            load_btn = WebDriverWait(driver, MAX_WAIT).until(EC.element_to_be_clickable((By.CLASS_NAME, LI_CLASS_LOAD_BTNCLASS)))
            load_btn.click()
        except Exception as e:
            print(f"No load button found or other issue: {e}")
        
        curr_height = driver.execute_script('return document.body.scrollHeight')
        print(f'Scrolling... Current Height: {curr_height}')

        if curr_height == last_height:
            indeed_finish_cnt += 1
            if indeed_finish_cnt >= 3:
                break
            print(f'Scrolling finished... Final Height: {curr_height}, Finish Count: {indeed_finish_cnt}')
        else:
            indeed_finish_cnt = 0

        last_height = curr_height

def print_lastperson(people):
    p = people[-1]
    print(f'{len(people)}. {p["name"]} (href="{p["link"]}")')

def find_people(css_selector, name_selector=None):
    people = []
    try:
        entities = WebDriverWait(driver, MAX_WAIT).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, css_selector)))
    except NoSuchElementException:
        pass # No people found
    else:
        for entity in entities:
            if name_selector: # Connection
                name = entity.find_element(By.CSS_SELECTOR, name_selector).text
            else: # Follower
                name = entity.text
                
            link = entity.get_attribute('href')
            people.append({'name': name, 'link': link})
            print_lastperson(people)

    return people

def find_strangers(followers, connections):
    strangers = []
    f_set = set([f['name'] for f in followers])
    c_set = set([c['name'] for c in connections])
    s_set = c_set - f_set

    for c in connections:
        if c['name'] in s_set:
            strangers.append(c)
            print_lastperson(strangers)

    # for c in connections:
    #     is_stranger = True
    #     for f in followers:
    #         if c['name'] == f['name']:
    #             is_stranger = False
    #             break
    #     if is_stranger:
    #         strangers.append(c)
    #         print_lastperson(strangers)

    return strangers

def find_filepath(filename, ext):
    fn = f'{filename}.{ext}'
    filepaths = [os.path.join(EXPORTS_PARENT_PATH, fn),
                 os.path.join(EXPORTS_DESKTOP_PATH, fn)]

    found_fp = None
    for fp in filepaths:
        if os.path.exists(fp):
            found_fp = fp
            break
    
    if not found_fp:
        raise FileNotFoundError
    
    return found_fp

def export_to_md(people, filename):
    try:
        fp = find_filepath(filename, 'md')
    except FileNotFoundError:
        os.makedirs(EXPORTS_PARENT_PATH, exist_ok=True)
        fp = os.path.join(EXPORTS_PARENT_PATH, f'{filename}.md')
        pass # File not found
    finally:
        with open(fp, 'w', encoding='utf-8') as f:
            f.write(f'# {filename.capitalize()}  \n')
            f.write(f'> **{len(people)}** people found.  \n\n')
            f.write('*[Name](Link)*  \n')
            f.write('---  \n')
            for p in people:
                f.write(f'[{p["name"]}]({p["link"]})  \n')

        print(f'{len(people)} people exported to {fp}')

def import_from_md(filename):
    try:
        fp = find_filepath(filename, 'md')
    except FileNotFoundError:
        print(f'File {filename}.md not found in {EXPORTS_FDNAME} folder')
        quit_and_exit()
        return None
    except Exception as e:
        print(e)
        quit_and_exit()
        return None
    else:
        people = []
        with open(fp, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if line:
                    if '](' in line and line.startswith('[') and line.endswith(')'):
                        try:
                            div = line.split('](')
                            name = div[0][1:] # remove '['
                            link = div[1][:-1] # remove ')'
                        except IndexError:
                            continue # Invalid line
                        else:
                            people.append({'name': name, 'link': link})

        print(f'{len(people)} people imported from {fp}')
        return people


In [None]:
# main.py

# from utils.li_fff_checker import *

login()

load_to_bottom(LI_URL_FOLLOWERS)
followers = find_people(LI_CLASS_FOLLOWERS_ENTITY)
export_to_md(followers, 'followers')

load_to_bottom(LI_URL_CONNECTIONS)
connections = find_people(LI_CLASS_CONNECTIONS_ENTITY, LI_CLASS_CONNECTIONS_NAME)
export_to_md(connections, 'connections')

imported_followers = import_from_md('followers')
imported_connections = import_from_md('connections')

strangers = find_strangers(imported_followers, imported_connections)
export_to_md(strangers, 'strangers')

quit_and_exit()
