### Importing Selenium Webdriver & Testing

In [1]:
from selenium import webdriver  # allow launching browser
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By # allow search with parameters
from selenium.webdriver.support.ui import WebDriverWait # allow waiting for page to load
from selenium.webdriver.support import expected_conditions as EC    # determine whether the web page has loaded
from selenium.common.exceptions import TimeoutException # handling timeout situation

Code for opening new browser window (useful while doing parallelization):

In [2]:
driver_option = webdriver.ChromeOptions()
driver_option.add_argument(" - incognito")
chromedriver_path = Service(r'D:/Downloads/chromedriver_win32/chromedriver.exe')

def create_webdriver():
    return webdriver.Chrome(service=chromedriver_path, options=driver_option)

### Opening the Github page & Extracting the needed HTML elements

In [3]:
browser = create_webdriver()
browser.get('https://github.com/collections/machine-learning')

Extracting all projects using their XPaths:

In [6]:
projects = browser.find_elements(By.XPATH, '//div[@class="d-flex flex-justify-between flex-items-start mb-1"]/h1')

Extracting info for each project:

In [7]:
project_list = {}
for proj in projects:
    proj_name = proj.text
    proj_url = proj.find_elements(By.XPATH, "a")[0].get_attribute('href')
    project_list[proj_name] = proj_url

Closing the browser connection:

In [None]:
browser.quit()

### Saving the Data

In [None]:
import pandas as pd
project_df = pd.DataFrame.from_dict(project_list, orient='index')
project_df['project_name'] = project_df.index
project_df.columns = ['project_url', 'project_name']
project_df = project_df.reset_index(drop=True)
project_df.to_csv('project_list.csv')