# Sync vs Async

consecutive vs concurrent

In [1]:
import time
import asyncio

In [3]:
%%time
iteration_times = [1, 3, 2, 4]

def sleeper(seconds, i=-1):
    
    if i != -1:
        print(f"{i}\t{seconds}s")
    time.sleep(seconds)
    
def run():
    for i, second in enumerate(iteration_times):
        sleeper(second, i=i)
        
run()

0	1s
1	3s
2	2s
3	4s
Wall time: 10 s


In [4]:
iteration_time = [1, 3, 2, 1]

async def a_sleeper(seconds, i=-1):
    if i != (-1):
        print(f"{i}\t{seconds}")
    await asyncio.sleep(seconds)
    
async def a_run():
    for i, second in enumerate(iteration_time):
        asyncio.create_task(a_sleeper(second, i=i))
        
asyncio.run(a_run())

RuntimeError: asyncio.run() cannot be called from a running event loop

# Blocking and Timeout

The code below is an example of blocking code asyncio.sleep() is an async function, therefore  
it is needed to be awaited. However, we are also making our sleeper function await on line 6, this  
does not allow program to move beyond that point, therefore when we try to print hello world jupyter  
notebook will not do that until after 12 seconds.

In [38]:
async def sleeper(seconds, i=-1):
    if i != -1:
        print(f"{i}\t{seconds}")
    await asyncio.sleep(seconds)
    
await sleeper(12, i=1)

1	12


In [39]:
# this print statement will execute 12 seconds after above code is executed

print("hello world")

hello world


In [40]:
# getting jupyter notebooks eventloop.
loop = asyncio.get_event_loop()

loop.create_task(sleeper(12, 1))
# loop.run_until_complete(sleeper(12))

<Task pending coro=<async-def-wrapper.<locals>.sleeper() running at <ipython-input-38-01b7cce9c16b>:4>>

In [41]:
print('hello world')

hello world


# asyncio.wait() function

In [33]:
# wait returns a set of done and pending futures, pending with respect to timeout
# here, one sleeper takes 1 second and the other takes 12 and the time out is 2 sec
# therefor the second task will get timedout and will be added to pending set.

done, pending = await asyncio.wait([sleeper(1), sleeper(12)], timeout=2)

In [34]:
done

{<Task finished coro=<async-def-wrapper.<locals>.sleeper() done, defined at <ipython-input-12-01b7cce9c16b>:4> result=None>}

In [35]:
pending

{<Task pending coro=<async-def-wrapper.<locals>.sleeper() running at <ipython-input-12-01b7cce9c16b>:7> wait_for=<Future pending cb=[<TaskWakeupMethWrapper object at 0x00000000089DACD8>()]>>}

In [36]:
# running the pending future
# the pending future will start from where it stoped

await asyncio.wait(pending, timeout=12)

({<Task finished coro=<async-def-wrapper.<locals>.sleeper() done, defined at <ipython-input-12-01b7cce9c16b>:4> result=None>},
 set())

In [37]:
# .wait_for() will raise a TimeoutError if the future takes more time than the mentioned timeout.

await asyncio.wait_for(sleeper(20), timeout=10)

TimeoutError: 

#### # .wait_for() can be useful for terminating tasks that have an uncertain time requirement, and are taking too much time to finish.

In [40]:
# putting .wait_for() in a try: except block 

try:
    await asyncio.wait_for(sleeper(20), timeout=3)
except asyncio.TimeoutError:
    print("Task failed successfully")

Task failed successfully


## Scraping with Selenium - Synchronous

In [2]:
import re

import requests
from requests_html import HTML
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import pandas as pd

In [4]:
def scraper(url: str):
    """Scrapes the HTML of the passed URL using selenium webdriver."""
    options = Options()
    options.add_argument("--headless")              # running chrome without opening the browser
    driver = webdriver.Chrome(options=options)      # creating a webdriver instance with passed arguments
    driver.get(url)
    return driver.page_source                       # getting the HTML

def extract_id_slug(url_path: str):
    """separates product id and product slug, from product link."""

    regex = r"^[^\s]+/(?P<id>\d+)-(?P<slug>[\w_-]+)$"
    group = re.match(regex, url_path)
    if not group:
        return (None, None)
    return (group['id'], group['slug'])

In [6]:
url = "https://www.spoonflower.com/en/shop?on=fabric"

In [9]:
content = scraper(url)
print(content[:100*10])     # printing first thousand letters of the string

<html class="js" lang="en" data-wf-page="5d63f10f2f49481077e21b0c" data-wf-site="5c083293d3df665a93b5c3a0"><head><script type="text/javascript" src="https://bam-cell.nr-data.net/1/331b4fd22c?a=15371075&amp;v=1208.49599aa&amp;to=IFxcFkBbCFgEE01DBwJQRk1BXAtE&amp;rst=6949&amp;ck=1&amp;ref=https://www.spoonflower.com/en/shop&amp;ap=41&amp;be=1397&amp;fe=6845&amp;dc=3114&amp;af=err,xhr,stn,ins,spa&amp;perf=%7B%22timing%22:%7B%22of%22:1620889236760,%22n%22:0,%22f%22:0,%22dn%22:196,%22dne%22:198,%22c%22:198,%22s%22:201,%22ce%22:259,%22rq%22:259,%22rp%22:1379,%22rpe%22:1381,%22dl%22:1384,%22di%22:3114,%22ds%22:3114,%22de%22:3117,%22dc%22:6844,%22l%22:6844,%22le%22:6877%7D,%22navigation%22:%7B%7D%7D&amp;fp=4409&amp;fcp=4409&amp;jsonp=NREUM.setToken"></script><script async="" src="https://cdn2.exitintel.com/api/v1/pub/359?callback=exitintel.campaign.campaignsFileLoaded"></script>
  <meta charset="utf-8">
  <title>Shop Over 1 Million Fabric Designs | Spoonflower</title>
  <meta name="viewport" co

In [10]:
# Converting raw HTML to requests_html's HTML object for parsing.

html_r = HTML(html=content)

In [11]:
# getting all the links in the retrieved HTML.

print(html_r.links)

{'/en/products/8985918-sunflowers-cream-7x7-by-indybloomdesign', '/terms', '/en/products/7944022-golden-girls-illustration-peach-by-yesterdaycollection', '/en/products/703426-elephant-march-by-endemic', '/en/contact', '/en/products/7679631-scattered-earth-tones-watercolor-rainbows-by-anniemontgomerydesign', '/en/products/7522587-save-honey-bees-large-new-by-fernlesliestudio', '/en/products/1112778-rosie-riveter-by-spacefem', '/en/products/6590171-mermaid-scales-by-elladorine', '/en/fabric/8056679-ruth-bader-ginsburg-rbg-bust-black-by-katerhees', '/profiles/rebecca_reck_art', '/profiles/hippopottermiss', '/profiles/kimsa', '/en/fabric/4981816-black-white-dogs-by-littleislandcompany', '/en/products/2760166-chicken-boots-knit-fabric-regular-size-by-chickenboots', '/en/fabric/6782514-eame-s-wildflower-meadow-by-hipkiddesigns', '/en/fabric/7368347-dear-clementine-oranges-teal-by-crystal_walen', '/en/fabric/8405290-fable-floral-teal-jumbo-by-nouveau_bohemian', '/en/products/9155687-galaxy-de

In [12]:
# grabbing all the links that redirect to fabric detail

fabric_links = [x for x in html_r.links if '/en/fabric/' in x]
print(fabric_links)

['/en/fabric/8056679-ruth-bader-ginsburg-rbg-bust-black-by-katerhees', '/en/fabric/4981816-black-white-dogs-by-littleislandcompany', '/en/fabric/6782514-eame-s-wildflower-meadow-by-hipkiddesigns', '/en/fabric/7368347-dear-clementine-oranges-teal-by-crystal_walen', '/en/fabric/8405290-fable-floral-teal-jumbo-by-nouveau_bohemian', '/en/fabric/7522587-save-honey-bees-large-new-by-fernlesliestudio', '/en/fabric/10704201-ew-covid-grey-by-kindermama', '/en/fabric/8286001-hanging-out-by-sarah_knight', '/en/fabric/8197261-night-sky-stars-midnight-blue-by-at_the_cottage', '/en/fabric/5378956-under-water-by-lavish_season', '/en/fabric/7236018-native-eucalyptus-leaves-edition-1-fabric-wallpaper-by-erin__kendal', '/en/fabric/5822747-black-white-haunted-occult-by-xoxotique', '/en/fabric/5279418-navy-floral-by-crystal_walen', '/en/fabric/6545640-southdown-tartan-6-tan-black-white-by-weavingmajor', '/en/fabric/1112778-rosie-riveter-by-spacefem', '/en/fabric/8345787-plain-white-solid-white-plain-unpri

In [13]:
# separating the id and slug of the fabric from their links

datas = []

for path in fabric_links:
    id_, slug_ = extract_id_slug(path)
    # print(id_, slug_)
    data = {
        'id': id_,
        'slug': slug_,
        'path': path,
        'scraped': 0
    }

    datas.append(data)

In [14]:
print(datas)

[{'id': '8056679', 'slug': 'ruth-bader-ginsburg-rbg-bust-black-by-katerhees', 'path': '/en/fabric/8056679-ruth-bader-ginsburg-rbg-bust-black-by-katerhees', 'scraped': 0}, {'id': '4981816', 'slug': 'black-white-dogs-by-littleislandcompany', 'path': '/en/fabric/4981816-black-white-dogs-by-littleislandcompany', 'scraped': 0}, {'id': '6782514', 'slug': 'eame-s-wildflower-meadow-by-hipkiddesigns', 'path': '/en/fabric/6782514-eame-s-wildflower-meadow-by-hipkiddesigns', 'scraped': 0}, {'id': '7368347', 'slug': 'dear-clementine-oranges-teal-by-crystal_walen', 'path': '/en/fabric/7368347-dear-clementine-oranges-teal-by-crystal_walen', 'scraped': 0}, {'id': '8405290', 'slug': 'fable-floral-teal-jumbo-by-nouveau_bohemian', 'path': '/en/fabric/8405290-fable-floral-teal-jumbo-by-nouveau_bohemian', 'scraped': 0}, {'id': '7522587', 'slug': 'save-honey-bees-large-new-by-fernlesliestudio', 'path': '/en/fabric/7522587-save-honey-bees-large-new-by-fernlesliestudio', 'scraped': 0}, {'id': '10704201', 'slu

In [15]:
df = pd.DataFrame(datas)
df

Unnamed: 0,id,slug,path,scraped
0,8056679,ruth-bader-ginsburg-rbg-bust-black-by-katerhees,/en/fabric/8056679-ruth-bader-ginsburg-rbg-bus...,0
1,4981816,black-white-dogs-by-littleislandcompany,/en/fabric/4981816-black-white-dogs-by-littlei...,0
2,6782514,eame-s-wildflower-meadow-by-hipkiddesigns,/en/fabric/6782514-eame-s-wildflower-meadow-by...,0
3,7368347,dear-clementine-oranges-teal-by-crystal_walen,/en/fabric/7368347-dear-clementine-oranges-tea...,0
4,8405290,fable-floral-teal-jumbo-by-nouveau_bohemian,/en/fabric/8405290-fable-floral-teal-jumbo-by-...,0
...,...,...,...,...
79,3730688,william-morris-pimpernel-original-on-black-by-...,/en/fabric/3730688-william-morris-pimpernel-or...,0
80,6327300,call-mountains-evergreen-med-by-nouveau_bohemian,/en/fabric/6327300-call-mountains-evergreen-me...,0
81,7463028,seamless-watercolor-larger-leaves-pattern-1-by...,/en/fabric/7463028-seamless-watercolor-larger-...,0
82,6976212,christmas-gnomes-by-heartsandsharts,/en/fabric/6976212-christmas-gnomes-by-heartsa...,0


In [33]:
df.to_csv('idandslug.csv', index=False)

# Async Scraping with Chromedriver and arsenic

In [17]:
!pip install arsenic

Collecting arsenic
  Downloading arsenic-20.9-py3-none-any.whl (17 kB)
Collecting aiohttp>=2
  Downloading aiohttp-3.7.4.post0-cp37-cp37m-win_amd64.whl (630 kB)
Collecting structlog<21.0.0,>=20.1.0
  Downloading structlog-20.2.0-py2.py3-none-any.whl (49 kB)
Collecting async-timeout<4.0,>=3.0
  Downloading async_timeout-3.0.1-py3-none-any.whl (8.2 kB)
Collecting multidict<7.0,>=4.5
  Downloading multidict-5.1.0-cp37-cp37m-win_amd64.whl (48 kB)
Collecting yarl<2.0,>=1.0
  Downloading yarl-1.6.3-cp37-cp37m-win_amd64.whl (124 kB)
Installing collected packages: multidict, yarl, async-timeout, structlog, aiohttp, arsenic
Successfully installed aiohttp-3.7.4.post0 arsenic-20.9 async-timeout-3.0.1 multidict-5.1.0 structlog-20.2.0 yarl-1.6.3
You should consider upgrading via the 'c:\program files\python37\python.exe -m pip install --upgrade pip' command.


#### %%writefile file_name, writes the code in the cell, to an external file, provided by the file name,  
#### We did that here to avoid clashing codes event loop with jupyters eventloop.

In [47]:
%%writefile async_scrape_6.py
# Recording time.
import os
import asyncio
import itertools
import re
import time
import pathlib

import pandas as pd
from arsenic import (get_session, keys, browsers, services)
from requests_html import HTML

def store_links_as_df_pickle(datas:list, path:str='links.pkl')-> pd.DataFrame:

    new_df = pd.DataFrame(datas, index='id')

    if pathlib.Path(path).exists():                     # if a previous dataframe pickle exists.
        og_df = pd.read_pickle(path)                    # get previous df
        df = pd.concat([og_df, new_df], sort=False)     # concatinate old & new df
        df.drop_duplicates(subset=['id'], inplace=True) # droping rows with same product id, avoid duplication.
        df.to_pickle(path)
        return df
    else:    
        new_df.to_pickle(path)
        return new_df

async def extract_id_slug(url_path: str)-> tuple:
    """separates product id and product slug, from product link."""

    regex = r"^[^\s]+/(?P<id>\d+)-(?P<slug>[\w_-]+)$"
    group = re.match(regex, url_path)
    if not group:
        return (None, None)
    return (group['id'], group['slug'])

async def get_fabric_links(html_body: str)-> list:

    html_r = HTML(html=html_body)
    fabric_links = list(x for x in html_r.links if '/en/fabric' in x)
    datas = list()

    for link in fabric_links:
        id_, slug_ = await extract_id_slug(link)
        data = {
            'id': id_,
            'slug': slug_,
            'path': link,
            'scraped': 0
        }
        datas.append(data)

    return datas

async def scraper(url:str, i=-1, timeout:int=60, start=None)-> list:
    """Scrapes the HTML of the passed URL using arsenic webdriver.
    Returns a list of dictionaries, with product id, slug, link."""

    service = services.Chromedriver()
    browser = browsers.Chrome()
    browser.capabilities = {
        'goog:chromeOptions': {'args': ['--headless', '--disable-gpu']}
    }

    async with get_session(service, browser) as session:
        # if the page doesn't respond, return an empty URLs list.
        try:
            await asyncio.wait_for(session.get(url), timeout=60)
        except asyncio.TimeoutError:
            return []

        body = await session.get_page_source()
        links = await get_fabric_links(body)

        if start is not None:
            end = time.time() - start
            print(f"{i} took {end} seconds")

        return links

async def run(urls:list, timeout:int=60, start=None)->list:
    """
    attr:
        urls(list): list of URLs of webpages to scrape.
        timeout(int): timeout for async."""

    site_links = list()
    # df = store_links_as_df_pickle(links)

    for i, url in enumerate(urls):
        site_links.append(
            asyncio.create_task(scraper(url, i=i, start=start))
        )
    
    list_of_links = await asyncio.gather(*site_links)
    return list_of_links


if __name__ == "__main__":
    urls = ["https://www.spoonflower.com/en/shop?on=fabric&page_offset=1",
            "https://www.spoonflower.com/en/shop?on=fabric&page_offset=2",
            "https://www.spoonflower.com/en/shop?on=fabric&page_offset=3",]

    start = time.time()
    results = asyncio.run(run(urls, timeout=30, start=start))
    end = time.time() - start
    print(f"Total time is {end}")
    print(f"length of site_links list: {len(results)}")



Overwriting async_scrape_6.py


In [48]:
import pandas as pd
df_pickle = pd.read_pickle('links.pkl')

# df_pickle.drop_duplicates(subset=['id'], inplace=True)

df_pickle['path']

0     /en/fabric/7502637-fable-floral-black-jumbo-by...
1     /en/fabric/9608189-art-nouveau-poppy-red-wallp...
2     /en/fabric/9155687-galaxy-deep-space-seamless-...
3     /en/fabric/6812243-cute-kawaii-sushi-small-siz...
4     /en/fabric/8091225-dark-floral-black-roses-on-...
                            ...                        
79    /en/fabric/6075822-soft-meadow-floral-by-sweet...
80    /en/fabric/2760166-chicken-boots-knit-fabric-r...
81             /en/fabric/7685381-dragon-fire-by-adenaj
82       /en/fabric/7812388-dnd-pattern-by-neonborealis
83    /en/fabric/509390-spoonflower-color-map-by-spo...
Name: path, Length: 84, dtype: object

In [43]:
k = list(f"https://www.spoonflower.com/en/shop?on=fabric&page_offset={x}" for x in range(1, 120))
print(k)

['https://www.spoonflower.com/en/shop?on=fabric&page_offset=1', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=2', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=3', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=4', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=5', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=6', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=7', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=8', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=9', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=10', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=11', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=12', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=13', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=14', 'https://www.spoonflower.com/en/shop?on=fabric&page_offset=15', 'https://www.spoonflower.com/en/shop?on=fabric&p

# Extracting Product data (Synchronously).

In [136]:
import re
import time

import requests
from requests_html import HTML
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import pandas as pd
from urllib.parse import urlparse

def scraper(url: str):
    """Scrapes the HTML of the passed URL using selenium webdriver."""
    options = Options()
    options.add_argument("--headless")              # running chrome without opening the browser
    driver = webdriver.Chrome(options=options)      # creating a webdriver instance with passed arguments
    driver.get(url)
    time.sleep(4)
    return driver.page_source                       # getting the HTML

def extract_id_slug(url_path: str):
    """separates product id and product slug, from product link."""

    parsed_url  = urlparse(url_path)
    path        = parsed_url.path
    regex       = r"^[^\s]+/(?P<id>\d+)-(?P<slug>[\w_-]+)$"
    group       = re.match(regex, path)
    if not group:
        return (None, None)
    return (group['id'], group['slug'], path)

#### Getting the HTML of the product detail page

In [139]:
url = "https://www.spoonflower.com/en/shop?on=fabric"
url2 = "https://www.spoonflower.com/en/fabric/6075822-soft-meadow-floral-by-sweeterthanhoney"

html_str = scraper(url2)
content = HTML(html=html_str)
content

<HTML url='https://example.org/'>

#### Extracting the product detail from product deatil page HTML

In [130]:
def get_product_data(url, content):

    id_, slug_, path = extract_id_slug(url)
    title_elm        = content.find(".design-title")[0]
    size_txt = content.find('#fabric-size')[0].text

    data = {
        'id': id_,
        'slug': slug_,
        'path': path,
    }
    title = None
    if title_elm is None:
        return data

    #_________grabbing title_____________#
    title = title_elm.text
    #_________grabing size and dimension___________#
    sizes = re.findall(r"\d+(?=\scm)", size_elm)
    length, breadth = int(sizes[0]), int(sizes[1])
    unit = re.findall(r"(?<=\d\d\s)\w+", size_elm)[0]
    #_____________Grabbing price data_____________#
    price = content.find(".visuallyhidden span")
    amount = price[0].attrs['content']
    currency = price[1].attrs['content']
    #____________inserting data____________#
    data['title']    = title
    data['length']   = length
    data['breadth']  = breadth
    data['unit']     = unit
    data['price']    = float(amount)
    data['currency'] = currency
    
    return data

In [131]:
get_product_data(url, content)

{'id': '703426',
 'slug': 'elephant-march-by-endemic',
 'path': '/en/fabric/703426-elephant-march-by-endemic',
 'title': 'elephant march',
 'length': 53,
 'breadth': 50,
 'unit': 'cm',
 'price': 10.58,
 'currency': 'EUR'}

In [141]:
content.find('.design-title')[0].text

'Soft Meadow Floral'

# Product Data Extraction (Async)

In [134]:
# Scraping multiple URLs, i.e. multiple pages.
%%writefile data_extraction.py

import os
import asyncio
import itertools
import re
import time
import pathlib
from urllib.parse import urlparse

import pandas as pd
from arsenic import (get_session, keys, browsers, services)
from requests_html import HTML

def store_links_as_df_pickle(datas:list, path:str='links.pkl')-> pd.DataFrame:
    """Creates a Pandas DataFrame from the passed list, stores it in a pickle.
    attr:
        datas(list): List of dictionaries
        path(str)  : path of a pickle file if exists"""

    new_df = pd.DataFrame(datas, index='id')

    if pathlib.Path(path).exists():                     # if a previous dataframe pickle exists.
        og_df = pd.read_pickle(path)                    # get previous df
        df = pd.concat([og_df, new_df], sort=False)     # concatinate old & new df
        df.drop_duplicates(subset=['id'], inplace=True) # droping rows with same product id, avoid duplication.
        df.to_pickle(path)
        return df
    else:    
        new_df.to_pickle(path)
        return new_df

async def extract_id_slug(url_path:str)-> tuple:
    """separates product id and product slug, from product link."""

    parsed_url  = urlparse(url_path)
    path        = parsed_url.path
    regex = r"^[^\s]+/(?P<id>\d+)-(?P<slug>[\w_-]+)$"
    group = re.match(regex, url_path)
    if not group:
        return (None, None, path)
    return (group['id'], group['slug'], path)

async def get_parsable_html(raw_html:str)->HTML:
    """Takes a raw HTML string and returns parsable
     requests_html.HTML object"""
     
    return HTML(html=raw_html)

async def get_fabric_links(html_r:HTML)-> list:
    """Get relative links of fabric detail page from an requests_html.HTML object"""

    fabric_links = list(x for x in html_r.links if '/en/fabric' in x)
    datas = list()

    for link in fabric_links:
        id_, slug_, _ = await extract_id_slug(link)
        data = {
            'id': id_,
            'slug': slug_,
            'path': link,
            'scraped': 0
        }
        datas.append(data)

    return datas

def get_product_data(url:str, html_r:HTML)->dict:

    id_, slug_, path = await extract_id_slug(url)
    title_elm        = content.find(".design-title")[0]
    size_txt = content.find('#fabric-size')[0].text

    data = {
        'id': id_,
        'slug': slug_,
        'path': path,
    }
    title = None
    if title_elm is None:
        return data

    #_________grabbing title_____________#
    title = title_elm.text
    #_________grabing size and dimension___________#
    sizes = re.findall(r"\d+(?=\scm)", size_elm)
    length, breadth = int(sizes[0]), int(sizes[1])
    unit = re.findall(r"(?<=\d\d\s)\w+", size_elm)[0]
    #_____________Grabbing price data_____________#
    price = content.find(".visuallyhidden span")
    amount = price[0].attrs['content']
    currency = price[1].attrs['content']
    #____________inserting data____________#
    data['title']    = title
    data['length']   = length
    data['breadth']  = breadth
    data['unit']     = unit
    data['price']    = float(amount)
    data['currency'] = currency
    
    return data

async def scraper(url:str, i=-1, timeout:int=60, start=None)-> dict:
    """Scrapes the HTML of the passed URL using arsenic webdriver.
    Returns a list of dictionaries, with product id, slug, link."""

    service = services.Chromedriver()
    browser = browsers.Chrome()
    browser.capabilities = {
        'goog:chromeOptions': {'args': ['--headless', '--disable-gpu']}
    }

    async with get_session(service, browser) as session:
        # if the page doesn't respond, return an empty URLs list.
        try:
            await asyncio.wait_for(session.get(url), timeout=60)
        except asyncio.TimeoutError:
            return []

        await asyncio.sleep(10)
        body = await session.get_page_source()      # getting raw HTML
        html_r = await get_parsable_html(body)      # converting to parsable HTML
        links = await get_fabric_links(html_r)      # getting relative links
        product_data = await get_product_data(url, html_r)
        
        dataset = {
            "links": links,
            "product_data": product_data
        }
        #_____________printing time consumption_________________#
        if start is not None:
            end = time.time() - start
            print(f"{i} took {end} seconds")

        return dataset

async def run(urls:list, timeout:int=60, start=None)->list:
    """
    attr:
        urls(list): list of URLs of webpages to scrape.
        timeout(int): timeout for async."""

    site_links = list()
    # df = store_links_as_df_pickle(links)

    for i, url in enumerate(urls):
        site_links.append(
            asyncio.create_task(scraper(url, i=i, start=start))
        )
    
    list_of_links = await asyncio.gather(*site_links)
    return list_of_links


if __name__ == "__main__":
    start = time.time()
    urls = ["https://www.spoonflower.com/en/shop?on=fabric&page_offset=1",
            "https://www.spoonflower.com/en/shop?on=fabric&page_offset=2",]

    results = asyncio.run(run(urls, timeout=30, start=start))

    # df = asyncio.run(run(url))
    print(f"length of site_links list: {len(results)}")


SyntaxError: 'await' outside async function (<ipython-input-134-1a6197ff75f8>, line 75)