# SELECTOR 

In [1]:
from scrapy import Selector

In [2]:
html = '''
<html>
  <body>
    <div class ="hello datacamp">
      <p>Hello World!</p>
    </div>
    <p>Enjoy DataCamp!</p>
  </body>
</html>
'''

In [3]:
sel = Selector( text = html)

In [4]:
sel.xpath("//p")

[<Selector xpath='//p' data='<p>Hello World!</p>'>,
 <Selector xpath='//p' data='<p>Enjoy DataCamp!</p>'>]

# Extracting Data from a SelectorList with xpath

In [5]:
sel.xpath("//p").extract()

['<p>Hello World!</p>', '<p>Enjoy DataCamp!</p>']

In [6]:
ps = sel.xpath('//p')
second_p = ps[1]

In [7]:
second_p.extract()

'<p>Enjoy DataCamp!</p>'

In [8]:
first_p = ps[0]

In [9]:
first_p.extract()

'<p>Hello World!</p>'

XPath Chaining 

Selector and SelectorList objects allow for chaining when using the xpath method. What this means is that you can apply the xpath method over once you've already applied it. For example, if sel is the name of our Selector, then

sel.xpath('/html/body/div[2]')

is the same as 

sel.xpath('/html').xpath('./body/div[2]')

or is the same as

sel.xpath('/html').xpath('./body').xpath('./div[2]').

The only catch is that you need to glue together the Xpath pieces by using a period at the start of each subsequent XPath string ( notice the periods we added to the XPath strings in our examples).



In [28]:
from scrapy import selector

In [30]:
html = '''

<html>
 <body>
  <p id="p-example">
    Hello World!
    Try <a href="http://www.datacamp.com"<DataCamp</a>today!
  </p>
 </body>
</html>
'''

In [31]:
sel = Selector( text = html )

In [33]:
sel.xpath('//p[@id="p-example"]/text()').extract()

['\n    Hello World!\n    Try ']

In [34]:
sel.xpath('//p[@id="p-example"]//text()').extract()

['\n    Hello World!\n    Try ', 'today!\n  ']

# To get the html text into the selector object

In [11]:
import requests

url = 'https://www.datacamp.com/courses/all'

html = requests.get( url ).content

In [12]:
sel = Selector( text = html)

# Extracting data with CSSLocator

In [18]:
from scrapy import selector

html = '''
<html>
 <body>
   <div>
   <p> Hello World! </p>
   </div>
   <p> Enjoy DataCamp!</p>
 </body>

</html>

'''
sel = Selector( text = html )

In [19]:
css_locator = 'div#uid > p.class1'

Select all elements whose class attribute belongs to class1:

In [20]:
css_locator = '.class1'

In [21]:
sel.css("div > p")

[<Selector xpath='descendant-or-self::div/p' data='<p> Hello World! </p>'>]

In [22]:
sel.css("div > p").extract()

['<p> Hello World! </p>']

In [46]:
from scrapy import selector

html = '''

<html>
 <body>
  <p id="p-example">
    Hello World!
    Try <a href="http://www.datacamp.com">DataCamp</a> today!
  </p>
 </body>
</html>
'''

sel = Selector( text = html )

In [47]:
sel.css('p#p-example::text').extract()

['\n    Hello World!\n    Try ', ' today!\n  ']

In [48]:
sel.css('p#p-example ::text').extract()

['\n    Hello World!\n    Try ', 'DataCamp', ' today!\n  ']