# Scraping Instagram

#### Author: Alejandra Saldivar

In [None]:
# Import the necessary libraries

from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains

### Accessing the main page


In [2]:
### Sotre the location of the driver
PATH = 'path_to_driver'

### Initialize the chrome driver
driver = webdriver.Chrome(PATH)
driver.get("https://www.instagram.com/")

In [5]:
from selenium.webdriver.support import expected_conditions as EC

The conditions available in the different language bindings vary, but this is a non-exhaustive list of a few:

- alert is present
- element exists
- element is visible
- title contains
- title is
- element staleness
- visible text

The full list of available expected conditions for Python can be found [here](https://www.selenium.dev/selenium/docs/api/py/webdriver_support/selenium.webdriver.support.expected_conditions.html?highlight=expected). Here, you are interested in code that will allow your browser to wait until the cookis form becomes clicklable.

<div class="alert alert-info"><b>Exercise 2 </b> Write the code to halt your driver during 15 seconds or until the <i>Allow essential and optional cookies</i> button is clickable. Store the corresponding web element in a new variables called <b>cookies</b> and click it.</div>

In [6]:
### Handling cookies
cookies = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Allow essential and optional cookies")]')))
cookies.click()

#### Log in

To complete the log in process, we will need two steps. The first is to wait until the corresponding web elements are clickable once again. The second is to send our username and password credentials to be granted access to our account. 

In [7]:
### Wait until the usernae button or the password button is clickable
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))

In [9]:
### Send your username and password keys
username.clear()
username.send_keys("DUMMY_USERNAME")
password.clear()
password.send_keys("DUMMY_PASSWORD")


In [10]:
### Wait until the Log in button is visible and click it
login = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']")))
login.click()

In [11]:
### Wait until the Not now button is visible and click it.
save = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not now")]')))
save.click()

In [12]:
### Wait until the <i>Not now</i> button is visible and click it.
notifications = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]')))
notifications.click()

#### Search

Notice that to open the search bar, you will need to first click the magnifying glass icon in the left menu of the website. Let's start by identifying this element. To do so we will conduct a relative XPATH search, that will enable us to locate specific elements inside a defined node.

In [None]:
### Wait until the Search button is visible in left menu and click it.
driver.find_element(By.XPATH, '//div[@class="x9f619 xxk0z11 xvy4d1p x11xpdln xii2z7h x19c4wfv"]/*[name()="svg"][@aria-label="Buscar"]').click()

In [14]:
###  Wait until the Search bar is visible. Clear its content and send a search keyword.
searchbox = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//input[@placeholder='Search']")))
searchbox.clear()
searchbox.send_keys("#brutalism")

In [16]:
### Click the firsr result
elem = driver.find_element(By.XPATH, '//div[@class="x6s0dn4 x1wzhzgj x78zum5 xdt5ytf x5yr21d x1n2onr6 xh8yej3 xhtitgo"]')
elem.find_element_by_xpath('//div[@class="_abm4"]').click()


In [17]:
# Scroll down the page several times since we don't know the total length of the page
driver.execute_script("window.scrollBy(0,document.body.scrollHeight);")

In [30]:
### Retrieve the links to the original posts, the urls for the images and the corresponding image descriptions for all the search results displayed in the webpage.
post_urls = []
image_urls = []
descriptions = []

for result in driver.find_elements(By.XPATH, "//div[@class='_aabd _aa8k _aanf']"):
    post_urls.append(result.find_element(By.TAG_NAME, "a").get_attribute("href"))
    image_urls.append(result.find_element(By.TAG_NAME, "img").get_attribute("src"))
    descriptions.append(result.find_element(By.TAG_NAME, "img").get_attribute("alt"))

In [34]:
### Move the mouse over the first search result
action = webdriver.ActionChains(driver)
element = driver.find_element(By.XPATH, "//div[@class='_aabd _aa8k _aanf']")
action.move_to_element(element)
action.perform()

In [37]:
### Retrieve the number of likes and the number of comments for all the search results currently displayed in the webpage.
likes = []
comments = []

for result in driver.find_elements(By.XPATH, "//div[@class='_aabd _aa8k _aanf']"):
    action = webdriver.ActionChains(driver)
    action.move_to_element(result)
    action.perform()
    try:
        likes.append(result.find_element(By.XPATH, "//ul[@class='_abpo']").find_elements(By.TAG_NAME, "li")[0].text)
    except:
        likes.append(None)
    try:
        comments.append(result.find_element(By.XPATH, "//ul[@class='_abpo']").find_elements(By.TAG_NAME, "li")[1].text)
    except:
        comments.append(None)

#### Create new post

The first step will require that you click on the plus icon in the left menu. 

In [40]:
### Wait until the New post button is visible in left menu and click it.
driver.find_element(By.XPATH, '//div[@class="x9f619 xxk0z11 xvy4d1p x11xpdln xii2z7h x19c4wfv"]/*[name()="svg"][@aria-label="New post"]').click()

When doing so, a pop-up message appears allowing us to select an image from our computer. 

Inspect the emerging message. You will see that in addition to the visible content, there's also an `input` tag that accepts different input types. In our case, we are going to provide an input of type `file`.

In [113]:
### Find the input tag and send the keys corresponding to the location of the file in your computer.
select_file = driver.find_element(By.CSS_SELECTOR, "input[type='file']")
select_file.send_keys("path_to_file")

### It's necessary to give Terminal access to control the computer in the System preferences. Also to Accept terms at some point.

In [114]:
### Identify the Next button and click it
WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Next")]'))).click()

In [115]:
### Identify the Next button and clickit
WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Next")]'))).click()

In [116]:
### Write a caption to the post
caption = driver.find_element(By.XPATH, "//textarea[@aria-label='Write a caption...']")
caption.send_keys("#summer")

In [87]:
### Identify the Share button and click it
WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Share")]'))).click()
