# Initial Tests

`Author: James Smith`

`Date: 14/12/2019`

## Requesting

In [15]:
import requests

url = 'https://www.donedeal.ie/cars/'

try:
    r = requests.get(url)
    if r.status_code != 200:
        raise
except:
    
    #Some webisites don't respond well to HTTP requests with 'python-requests' as a User Agent String.
    #You can get a 200 OK response if you set the User-Agent header to 'Mozilla'.
    print("User Agent Sting 'python-requests' caused the request to fail")
    print("Setting User Agent Sting to 'Mozilla/5'", "\n")
    
    headers = {'User-Agent':'Mozilla/5'}
    r = requests.get(url, headers = headers)

# Which HTTP status code did we get back from the server?
print("HTTP status code:", "\n", r.status_code, "\n")
# What is textual status code?
print("HTTP status:", "\n", r.reason, "\n")
# What were the HTTP response headers?
print("HTTP response headers:", "\n", r.headers, "\n")
# The request information is saved as a Python object in r.request:
print("HTTP request information:", "\n", r.request, "\n")
# What were the HTTP request headers?
print("HTTP request headers:", "\n", r.request.headers, "\n")
# The HTTP response content:
print("First 1000 characters of HTTP response content :", "\n", r.text[0:1000], "\n")

User Agent Sting 'python-requests' caused the request to fail
Setting User Agent Sting to 'Mozilla/5' 

HTTP status code: 
 200 

HTTP status: 
 OK 

HTTP response headers: 
 {'Content-Language': 'en-US', 'Cache-Control': 'no-cache, no-store', 'Pragma': 'no-cache', 'Server': 'donedeal', 'Expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'unreadMessages': '0', 'totalUnread': '0', 'Date': 'Mon, 16 Dec 2019 14:32:49 GMT', 'Transfer-Encoding': 'chunked', 'Connection': 'close', 'savedSearchIndicator': 'false', 'Vary': 'Origin,User-Agent,Accept-Encoding', 'Content-Encoding': 'gzip', 'Content-Type': 'text/html;charset=UTF-8', 'Set-Cookie': 'DDSearchStartTime=1576506769246; Domain=.donedeal.ie; Path=/; Secure; HttpOnly, JSESSIONID=378EDFD632F4E903100D8689A9075836; Path=/search/; Secure; HttpOnly, OPENSHIFT_production_search_SERVERID=970604834d14c7b80b1cae76d30f0cb6; path=/; HttpOnly', 'unreadHistoryChecks': '0', 'P3P': 'CP="DSP CUR OTPi IND OTRi ONL FIN"'} 

HTTP request information: 
 <PreparedReque

## Including Beautiful Soup

In [17]:
import requests
from bs4 import BeautifulSoup

url = 'https://www.donedeal.ie/cars/'

try:
    r = requests.get(url)
    if r.status_code != 200:
        raise
except:
    
    #Some webisites don't respond well to HTTP requests with 'python-requests' as a User Agent String.
    #You can get a 200 OK response if you set the User-Agent header to 'Mozilla'.
    print("User Agent Sting 'python-requests' caused the request to fail")
    print("Setting User Agent Sting to 'Mozilla/5'", "\n")
    
    headers = {'User-Agent':'Mozilla/5'}
    
    r = requests.get(url, headers = headers)

"""
The Beautiful Soup library itself depends on an HTML parser to perform most of the bulk parsing work.
In Python, multiple parsers exist to do so:

    html.parser: 
            a built-in Python parser that is decent (especially when 
            using recent versions of Python 3) and requires no extra installation.
    lxml: 
            which is very fast but requires an extra installation.
    
"""

html_contents = r.text
html_soup = BeautifulSoup(html_contents, 'html.parser')

User Agent Sting 'python-requests' caused the request to fail
Setting User Agent Sting to 'Mozilla/5' 



Testing the methods `find()` and `find_all()`

In [22]:
print(html_soup.find('h1'))
print("NEXT")
print(html_soup.find('', {'id': 'p-logo'}))
print("NEXT")
for found in html_soup.find_all(['h1', 'h2']):
    print(found)

None
NEXT
None
NEXT
<h1 class="search-header-title" data-reactid="18">Cars</h1>
<h1 class="num-results ng-cloak" ng-if="search.results.pagingCounts.displayValue">
<strong>
<ng-pluralize count="search.results.pagingCounts.displayValue" when="{'0': 'no ads',
                                '1': '1 ad',
                                'other': '{{search.results.pagingCounts.displayValue | number}} ads'}">
</ng-pluralize>
</strong>
<span>
<span ng-if="search.form.adType ==='wanted' "><strong> wanted</strong></span>
<span ng-if="search.submittedWords"> for <strong>"<span ng-bind="search.submittedWords"></span>"</strong> </span>
<span ng-if="search.form.section &amp;&amp; search.form.section !== 'all'"> for <strong ng-bind="search.results.breadcrumbs[search.results.breadcrumbs.length - 1].displayName"></strong></span>
                in <strong ng-bind="search.locationText"></strong>
<span ng-if="search.form['car-finance']"> with
                    <span class="u-ws-nowrap">Finance Availabl