# Scraping project - version 1.01
    

In [1105]:
# Importing libraries

import requests                     # For requesting urls
from bs4 import BeautifulSoup       # Parsing and handling html files
import pandas as pd                 # Structured data frames
import numpy as np                  # Support to data frames and used on exceptions np.nan
import matplotlib.pyplot as plt     # Visualizations library
from collections import OrderedDict # Preserving column order on dictionaries for data frames
%matplotlib inline                  # Visualizing without plt.show()

In [1078]:
# Format of DataFrame that will be created
# Initially built to give me an idea of required attributes 

df = {'ProductID':[],
      'ProductName':[],
      'Company':[],
      'WhoUses_Description': [],
      'TargetCustomerSize' : [],
      'StartPrice' : [],
      'FreeTrial' : [],
      'Features' : [],
      'OverallRating':[],
      'EaseOfUse':[],
      'CustomerService':[],
      'Features&Functionalities':[],
      'ValueForMoney':[],
      'Support':[],
      'Training':[]}

df = pd.DataFrame.from_dict(df)

Index(['Company', 'CustomerService', 'EaseOfUse', 'Features',
       'Features&Functionalities', 'FreeTrial', 'OverallRating', 'ProductID',
       'ProductName', 'StartPrice', 'Support', 'TargetCustomerSize',
       'Training', 'ValueForMoney', 'WhoUses_Description'],
      dtype='object')

In [639]:
# Requesting the front page of Capterra website

url_frontpage = 'https://www.capterra.com/real-estate-property-management-software/'
r = requests.get(url_frontpage)
html_doc = r.text
r.close()
soup = BeautifulSoup(html_doc, "html.parser")

In [643]:
# Grabs a list with each item on url listing: all_items
# all_items is the base for the loop on the main code
# Prints the amount of entries to be scraped

all_items = soup.findAll("div", {'class':'card listing'})
print("Total amount of products listed on url_frontpage requested is %d" % len(all_items))

Total amount of products listed on url_frontpage requested is 208


In [1054]:
# The code strategy:
# The attributes from each product is easier to get from the comparison page from Capterra's website, therefore:
#   Creation of an initial array with all product IDs;
#   For each pair of product ID we request a new comparison page url, this becomes a loop;
#   Attributes from comparison page is mapped and stored in temporary arrays;
#   Final arrays are appended with information by pairs, coming from the comparison page;
#   Loop of comparison is repeated until the last pair of ID in product ID array.

# Opening empty lists to store info arrays 

IDs = []
ProductName = []
Company = []
WhoUses_Description = []
TargetCustomerSize = []
OverallRating = []
EaseOfUse = []
CustomerService = []
Features_Functionality = []
ValueForMoney = []
StartPrice = []
FreeTrial = []
Deployement = []
Support = []
Features = []
Training = []
URL = []

# Main code

# Changing index changes where the comparison starts on all_items array, with all product IDs
# Note that index+=2 indicates the change of comparison pair
index = 0 
for item in all_items:
    item1 = all_items[index]['id']
    item2 = all_items[index+1]['id']
    index += 2
    
    # If clause to not break all_items length
    if index >= len(all_items):
        break
        
    # Creation of url_compare to enter the comparison page
    url_compare = 'https://www.capterra.com/real-estate-property-management-software/compare/' + \
              item1.replace('product-', '') + '-' + item2.replace('product-', '')
    r = requests.get(url_compare)
    html_doc = r.text
    r.close()
    soup_compare = BeautifulSoup(html_doc, "html.parser")    
    
    # Appending product IDs
    IDs.extend([item1, item2])
    
    # Appending names of product list 0 and 1 for the items in soup_compare
    ProductName.extend([soup_compare.findAll('div', {'class':'stack'})[1]\
    .findAll('a', {'onclick':"ga('send', 'event', 'Product Compare', 'Product Name Click');"})[0].text.strip(),
                        soup_compare.findAll('div', {'class':'stack'})[1]\
    .findAll('a', {'onclick':"ga('send', 'event', 'Product Compare', 'Product Name Click');"})[1].text.strip()])
        
        
    # Appending companies responsable for the product list 0 and 1 for the items in soup_compare
    Company.extend([soup_compare.findAll('div', {'class':'stack'})[1]\
    .findAll('p', {'class':"color-gray no-margin-bottom milli"})[0].text.replace('by\n', '').strip(),
    soup_compare.findAll('div', {'class':'stack'})[1]\
    .findAll('p', {'class':"color-gray no-margin-bottom milli"})[1].text.replace('by\n', '').strip()])

    # Appending who uses this software list 1 and 2 for the items in soup_compare
    WhoUses_Description.extend([soup_compare.findAll('div', {'class':'stack'})[2]
                               .findAll('td', {'class':'cell-divider'})[1].text,
                               soup_compare.findAll('div', {'class':'stack'})[2].
                               findAll('td', {'class':'cell-divider'})[2].text]
    )
    
    # Target Customer Size (Users) list 4 and 5 for the items in soup_compare
    TargetCustomerSize.extend([soup_compare.findAll('div', {'class':'stack'})[2]
                               .findAll('td', {'class':'cell-divider'})[4].text,
                              soup_compare.findAll('div', {'class':'stack'})[2]
                               .findAll('td', {'class':'cell-divider'})[5].text])
    
    # Pricing section from 0 to 8
    # Used 1-2 (start price) and 4-5 (free trial)
    StartPrice.extend([soup_compare.findAll('div', {'class':'stack'})[3]
                       .findAll('td', {'class':'cell-divider'})[1].text.strip(),
                       soup_compare.findAll('div', {'class':'stack'})[3]
                       .findAll('td', {'class':'cell-divider'})[2].text.strip()])
    FreeTrial.extend([soup_compare.findAll('div', {'class':'stack'})[3]
                       .findAll('td', {'class':'cell-divider'})[4].text.strip(),
                       soup_compare.findAll('div', {'class':'stack'})[3]
                       .findAll('td', {'class':'cell-divider'})[5].text.strip()])
    
    # How to handle this exception? The code above is only used if there is a description
    # If "not provided by vendor" the third findAll is not applicable!
    a = soup_compare.findAll('div', {'class':'stack'})[4]\
    .findAll('td', {'class':'cell-divider'})[4]
    b = soup_compare.findAll('div', {'class':'stack'})[4]\
    .findAll('td', {'class':'cell-divider'})[5]

    if len(a.text.split()) != 4:
        a_feature_list = []
        for feature in a.select('li[class=ss-check]'):
            a_feature_list.append(feature.text.strip())
        
        Features.append(a_feature_list)
    
    else:
        Features.append(a.text.strip())
            
    if len(b.text.split()) != 4:
        b_feature_list = []
        for feature in b.select('li[class=ss-check]'):
            b_feature_list.append(feature.text.strip())
        
        Features.append(b_feature_list)
            
    else:
        Features.append(b.text.strip())
            
    # Appending Overall Ratings att 1 - 2 on last findAll
    # Note that try and except clauses are used to handle missing ratings
    try: 
        OverallRating.append(soup_compare.findAll('div', {'class':'stack'})[5]
                          .findAll('td', {'class':'cell-divider'})[1].text.strip().split()[0])
    except IndexError:
        OverallRating.append(np.NaN)
        
    try:    
        OverallRating.append(soup_compare.findAll('div', {'class':'stack'})[5]
                          .findAll('td', {'class':'cell-divider'})[2].text.strip().split()[0])
    except IndexError:
        OverallRating.append(np.NaN)
        
    # Appending Ratings section Ease to Use att 4 - 5 on last findAll
    # Note that try and except clauses are used to handle missing ratings
    try:
        EaseOfUse.append(soup_compare.findAll('div', {'class':'stack'})[5]
                      .findAll('td', {'class':'cell-divider'})[4].text.strip().split()[0])
    except IndexError:
        EaseOfUse.append(np.NaN)
        
    try:
        EaseOfUse.append(soup_compare.findAll('div', {'class':'stack'})[5]
                      .findAll('td', {'class':'cell-divider'})[5].text.strip().split()[0])
    except IndexError:
        EaseOfUse.append(np.NaN)  
    
    # Appending Ratings section Customer Service att 7 - 8 on last findAll
    # Note that try and except clauses are used to handle missing ratings
    try:
        CustomerService.append(soup_compare.findAll('div', {'class':'stack'})[5]
                            .findAll('td', {'class':'cell-divider'})[7].text.strip().split()[0])
    except IndexError:
        CustomerService.append(np.NaN)
        
    try:    
        CustomerService.append(soup_compare.findAll('div', {'class':'stack'})[5]
                            .findAll('td', {'class':'cell-divider'})[8].text.strip().split()[0])
    except IndexError:
        CustomerService.append(np.NaN)
    
    # Appending Ratings section Features & Functionality att 10 - 11 on last findAll
    # Note that try and except clauses are used to handle missing ratings
    try:
        Features_Functionality.append(soup_compare.findAll('div', {'class':'stack'})[5]
                                   .findAll('td', {'class':'cell-divider'})[10].text.strip().split()[0])
    except IndexError:
         Features_Functionality.append(np.NaN)                         
    
    try:
        Features_Functionality.append(soup_compare.findAll('div', {'class':'stack'})[5]
                                   .findAll('td', {'class':'cell-divider'})[11].text.strip().split()[0])
    except IndexError:
         Features_Functionality.append(np.NaN)
    
    # Appending Ratings section ValueForMoney att 13 - 14 on last findAll
    # Note that try and except clauses are used to handle missing ratings
    
    try: 
        ValueForMoney.append(soup_compare.findAll('div', {'class':'stack'})[5]
                             .findAll('td', {'class':'cell-divider'})[13].text.strip().split()[0])
    except IndexError:
        ValueForMoney.append(np.NaN)
    
    try:
        ValueForMoney.append(soup_compare.findAll('div', {'class':'stack'})[5]
                             .findAll('td', {'class':'cell-divider'})[14].text.strip().split()[0])
    except IndexError:
        ValueForMoney.append(np.NaN) 
    
    # Solution for feature disable, transform each item on list in string and check if 'feature-disable' in string
    # From 1 - 2 
    a_sup = soup_compare.findAll('div', {'class':'stack'})[6].findAll('td', {'class':'cell-divider'})[1].findAll('li')
    a_sup_list = []
    for entry in a_sup:
        if 'feature-disable' not in str(entry):
            a_sup_list.append(entry.text)
    Support.append(a_sup_list)
        
    b_sup = soup_compare.findAll('div', {'class':'stack'})[6].findAll('td', {'class':'cell-divider'})[2].findAll('li')
    b_sup_list = []
    for entry in b_sup:
        if 'feature-disable' not in str(entry):
            b_sup_list.append(entry.text)
    Support.append(b_sup_list)
    
    # Training section 4 - 5 
    # Note the special solution of "class=ss-check\ " due to a space after "check" 
    
    trainings_a = soup_compare.findAll('div', {'class':'stack'})[6]\
    .findAll('td', {'class':'cell-divider'})[4].select("li[class=ss-check\ ] ")
    trainings_b = soup_compare.findAll('div', {'class':'stack'})[6]\
    .findAll('td', {'class':'cell-divider'})[5].select("li[class=ss-check\ ] ")
    training_list_a = []
    training_list_b = []
    
    for training in trainings_a:
        training_list_a.append(training.text)
        
    for training in trainings_b:
        training_list_b.append(training.text)
        
    Training.extend([training_list_a, training_list_b])

product-167973 product-165852
https://www.capterra.com/real-estate-property-management-software/compare/167973-165852


product-38478 product-133029
https://www.capterra.com/real-estate-property-management-software/compare/38478-133029


product-147392 product-33832
https://www.capterra.com/real-estate-property-management-software/compare/147392-33832


product-135182 product-128184
https://www.capterra.com/real-estate-property-management-software/compare/135182-128184


product-14544 product-164775
https://www.capterra.com/real-estate-property-management-software/compare/14544-164775


product-123688 product-20091
https://www.capterra.com/real-estate-property-management-software/compare/123688-20091


product-143505 product-147659
https://www.capterra.com/real-estate-property-management-software/compare/143505-147659


product-20372 product-154399
https://www.capterra.com/real-estate-property-management-software/compare/20372-154399


product-122933 product-151481
https://www.capterra.com/real-estate-property-management-software/compare/122933-151481


product-169422 product-165617
https://www.capterra.com/real-estate-property-management-software/compare/169422-165617


product-137909 product-129088
https://www.capterra.com/real-estate-property-management-software/compare/137909-129088


product-128379 product-146322
https://www.capterra.com/real-estate-property-management-software/compare/128379-146322


product-148438 product-141252
https://www.capterra.com/real-estate-property-management-software/compare/148438-141252


product-37330 product-135049
https://www.capterra.com/real-estate-property-management-software/compare/37330-135049


product-145035 product-14444
https://www.capterra.com/real-estate-property-management-software/compare/145035-14444


product-158989 product-74391
https://www.capterra.com/real-estate-property-management-software/compare/158989-74391


product-141454 product-105814
https://www.capterra.com/real-estate-property-management-software/compare/141454-105814


product-145260 product-154986
https://www.capterra.com/real-estate-property-management-software/compare/145260-154986


product-122572 product-158997
https://www.capterra.com/real-estate-property-management-software/compare/122572-158997


product-141471 product-151217
https://www.capterra.com/real-estate-property-management-software/compare/141471-151217


product-2743 product-63354
https://www.capterra.com/real-estate-property-management-software/compare/2743-63354


product-139748 product-37290
https://www.capterra.com/real-estate-property-management-software/compare/139748-37290


product-127303 product-122090
https://www.capterra.com/real-estate-property-management-software/compare/127303-122090


product-143325 product-141095
https://www.capterra.com/real-estate-property-management-software/compare/143325-141095


product-77333 product-129773
https://www.capterra.com/real-estate-property-management-software/compare/77333-129773


product-70753 product-127313
https://www.capterra.com/real-estate-property-management-software/compare/70753-127313


product-157665 product-120661
https://www.capterra.com/real-estate-property-management-software/compare/157665-120661


product-157176 product-145595
https://www.capterra.com/real-estate-property-management-software/compare/157176-145595


product-161175 product-127320
https://www.capterra.com/real-estate-property-management-software/compare/161175-127320


product-133963 product-140971
https://www.capterra.com/real-estate-property-management-software/compare/133963-140971


product-147729 product-92718
https://www.capterra.com/real-estate-property-management-software/compare/147729-92718


product-143307 product-164648
https://www.capterra.com/real-estate-property-management-software/compare/143307-164648


product-40752 product-128400
https://www.capterra.com/real-estate-property-management-software/compare/40752-128400


product-56379 product-132747
https://www.capterra.com/real-estate-property-management-software/compare/56379-132747


product-126062 product-152900
https://www.capterra.com/real-estate-property-management-software/compare/126062-152900


product-147468 product-164376
https://www.capterra.com/real-estate-property-management-software/compare/147468-164376


product-140891 product-79829
https://www.capterra.com/real-estate-property-management-software/compare/140891-79829


product-152837 product-163600
https://www.capterra.com/real-estate-property-management-software/compare/152837-163600


product-38468 product-104325
https://www.capterra.com/real-estate-property-management-software/compare/38468-104325


product-57648 product-131565
https://www.capterra.com/real-estate-property-management-software/compare/57648-131565


product-40734 product-92250
https://www.capterra.com/real-estate-property-management-software/compare/40734-92250


product-53496 product-75253
https://www.capterra.com/real-estate-property-management-software/compare/53496-75253


product-150707 product-155158
https://www.capterra.com/real-estate-property-management-software/compare/150707-155158


product-168870 product-34357
https://www.capterra.com/real-estate-property-management-software/compare/168870-34357


product-84069 product-164557
https://www.capterra.com/real-estate-property-management-software/compare/84069-164557


product-127316 product-83355
https://www.capterra.com/real-estate-property-management-software/compare/127316-83355


product-150573 product-127314
https://www.capterra.com/real-estate-property-management-software/compare/150573-127314


product-127305 product-141263
https://www.capterra.com/real-estate-property-management-software/compare/127305-141263


product-82248 product-159450
https://www.capterra.com/real-estate-property-management-software/compare/82248-159450


product-115473 product-163305
https://www.capterra.com/real-estate-property-management-software/compare/115473-163305


product-165872 product-167201
https://www.capterra.com/real-estate-property-management-software/compare/165872-167201


product-130132 product-143031
https://www.capterra.com/real-estate-property-management-software/compare/130132-143031


product-153699 product-171876
https://www.capterra.com/real-estate-property-management-software/compare/153699-171876


product-122449 product-118570
https://www.capterra.com/real-estate-property-management-software/compare/122449-118570


product-160380 product-40754
https://www.capterra.com/real-estate-property-management-software/compare/160380-40754


product-74868 product-112948
https://www.capterra.com/real-estate-property-management-software/compare/74868-112948


product-146266 product-37213
https://www.capterra.com/real-estate-property-management-software/compare/146266-37213


product-2742 product-130561
https://www.capterra.com/real-estate-property-management-software/compare/2742-130561


product-38477 product-139231
https://www.capterra.com/real-estate-property-management-software/compare/38477-139231


product-127318 product-17835
https://www.capterra.com/real-estate-property-management-software/compare/127318-17835


product-87310 product-153765
https://www.capterra.com/real-estate-property-management-software/compare/87310-153765


product-161244 product-28671
https://www.capterra.com/real-estate-property-management-software/compare/161244-28671


product-132097 product-56376
https://www.capterra.com/real-estate-property-management-software/compare/132097-56376


product-86391 product-171545
https://www.capterra.com/real-estate-property-management-software/compare/86391-171545


product-40076 product-163877
https://www.capterra.com/real-estate-property-management-software/compare/40076-163877


product-162498 product-127310
https://www.capterra.com/real-estate-property-management-software/compare/162498-127310


product-38308 product-156139
https://www.capterra.com/real-estate-property-management-software/compare/38308-156139


product-69269 product-142672
https://www.capterra.com/real-estate-property-management-software/compare/69269-142672


product-40738 product-38481
https://www.capterra.com/real-estate-property-management-software/compare/40738-38481


product-37271 product-2738
https://www.capterra.com/real-estate-property-management-software/compare/37271-2738


product-162061 product-80954
https://www.capterra.com/real-estate-property-management-software/compare/162061-80954


product-114509 product-70410
https://www.capterra.com/real-estate-property-management-software/compare/114509-70410


product-111010 product-20153
https://www.capterra.com/real-estate-property-management-software/compare/111010-20153


product-166353 product-2728
https://www.capterra.com/real-estate-property-management-software/compare/166353-2728


product-148534 product-127311
https://www.capterra.com/real-estate-property-management-software/compare/148534-127311


product-2730 product-165357
https://www.capterra.com/real-estate-property-management-software/compare/2730-165357


product-37289 product-128384
https://www.capterra.com/real-estate-property-management-software/compare/37289-128384


product-127307 product-68035
https://www.capterra.com/real-estate-property-management-software/compare/127307-68035


product-156513 product-155865
https://www.capterra.com/real-estate-property-management-software/compare/156513-155865


product-78873 product-144051
https://www.capterra.com/real-estate-property-management-software/compare/78873-144051


product-131561 product-135926
https://www.capterra.com/real-estate-property-management-software/compare/131561-135926


product-167135 product-2737
https://www.capterra.com/real-estate-property-management-software/compare/167135-2737


product-161246 product-127301
https://www.capterra.com/real-estate-property-management-software/compare/161246-127301


product-38895 product-127302
https://www.capterra.com/real-estate-property-management-software/compare/38895-127302


product-40313 product-32711
https://www.capterra.com/real-estate-property-management-software/compare/40313-32711


product-37355 product-127312
https://www.capterra.com/real-estate-property-management-software/compare/37355-127312


product-29109 product-23131
https://www.capterra.com/real-estate-property-management-software/compare/29109-23131


product-91490 product-37288
https://www.capterra.com/real-estate-property-management-software/compare/91490-37288


product-69331 product-155098
https://www.capterra.com/real-estate-property-management-software/compare/69331-155098


product-37351 product-77462
https://www.capterra.com/real-estate-property-management-software/compare/37351-77462


product-167513 product-146807
https://www.capterra.com/real-estate-property-management-software/compare/167513-146807


product-48109 product-162574
https://www.capterra.com/real-estate-property-management-software/compare/48109-162574


product-170202 product-133677
https://www.capterra.com/real-estate-property-management-software/compare/170202-133677


product-56328 product-164670
https://www.capterra.com/real-estate-property-management-software/compare/56328-164670


product-159606 product-84959
https://www.capterra.com/real-estate-property-management-software/compare/159606-84959


product-160864 product-151177
https://www.capterra.com/real-estate-property-management-software/compare/160864-151177


product-126018 product-15143
https://www.capterra.com/real-estate-property-management-software/compare/126018-15143


product-148964 product-75481
https://www.capterra.com/real-estate-property-management-software/compare/148964-75481


product-85096 product-142986
https://www.capterra.com/real-estate-property-management-software/compare/85096-142986


product-160125 product-136598
https://www.capterra.com/real-estate-property-management-software/compare/160125-136598


product-128385 product-143038
https://www.capterra.com/real-estate-property-management-software/compare/128385-143038


product-89088 product-159303
https://www.capterra.com/real-estate-property-management-software/compare/89088-159303


product-162831 product-164741
https://www.capterra.com/real-estate-property-management-software/compare/162831-164741


In [None]:
# Adding the URL to df
# This came as an extra task after the main code was done

# the code strategy
#   I noted that I could extract product's urls from the main front page that means no need to new requests

# New array initialized to store scraped info
# item.a['href'] was the pattern to get desider urls
URL = []

for item in all_items[0:206]:
    URL.append('https://www.capterra.com/' + item.a['href'])

In [1131]:
# Dictionary done using the information arrays
# Note the OrderedDict used here to keep column orders when parsing to Excel file
df =OrderedDict({'ProductID':IDs,
      'ProductName':ProductName,
      'Product URL': URL,
      'Company':Company,
      'WhoUses_Description': WhoUses_Description,
      'TargetCustomerSize' : TargetCustomerSize,
      'StartPrice' : StartPrice,
      'FreeTrial' : FreeTrial,
      'Features' : Features,
      'OverallRating':OverallRating,
      'EaseOfUse':EaseOfUse,
      'CustomerService':CustomerService,
      'Features&Functionality' : Features_Functionality,
      'ValueForMoney': ValueForMoney,
      'Support':Support,
      'Training': Training})

df = pd.DataFrame.from_dict(df)

# Saving in csv
df.to_csv(str(soup.title.text) + '.csv', header=df.columns, sep=',', encoding='utf-8')

#Saving in excel
writer = pd.ExcelWriter((soup.title.text) + '.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
writer.save()

In [1044]:
# Appending Ratings section Features & Functionality att 10 - 11 on last findAll
#soup_compare.findAll('div', {'class':'stack'})[5].findAll('td', {'class':'cell-divider'})[10].text.strip().split()[0]
#soup_compare.findAll('div', {'class':'stack'})[5].findAll('td', {'class':'cell-divider'})[11].text.strip().split()[0]
    
# Appending Ratings section ValueForMoney att 13 - 14 on last findAll

try:
    soup_compare.findAll('div', {'class':'stack'})[5].findAll('td', {'class':'cell-divider'})[13].text.strip().split()[0]
except IndexError:
    print('ok')
#soup_compare.findAll('div', {'class':'stack'})[5].findAll('td', {'class':'cell-divider'})[14].text.strip().split()[0]

ok


In [998]:
# For loop generating list of IDs: id_list

id_list = []
for product in all_items:
    id_list.append(product['id'])
    
id_list[0:4]

['product-167973', 'product-165852', 'product-38478', 'product-133029']

In [767]:
# Generating the url for compasion betwwen product on id_list

url_compare = 'https://www.capterra.com/real-estate-property-management-software/compare/' + \
              id_list[2].replace('product-', '') + '-' + id_list[3].replace('product-', '')
r = requests.get(url_compare)
html_doc = r.text
r.close()
soup_compare = BeautifulSoup(html_doc, "html.parser")

In [763]:
# Function that creates the right list with 
# Name of product, Product's company, Rating, Number of Ratings
def string_transform(self):
    new_string = self.text.replace('\n', '').replace('Remove', '').replace('/', '').replace('by', '').split()
    return new_string

In [821]:
#soup_compare.findAll('th', {'class':'cell-divider'})[1].text

#print(string_transform(soup_compare.findAll('th', {'class':'cell-divider'})[1]))
#print(string_transform(soup_compare.findAll('th', {'class':'cell-divider'})[2]))

In [839]:
#soup_compare.findAll('th', {'class':'cell-divider'})[2].text.replace('\n', '').replace('Remove', '')\
    #.replace('/', '').replace('by', '').strip()

In [842]:
# Here I found the sintax of the blocks
# The first findAll I run from block to block (i.e Best For - Pricing...)

# Name of product list 0 and 1 for the items in soup_compare
soup_compare.findAll('div', {'class':'stack'})[1]\
    .findAll('a', {'onclick':"ga('send', 'event', 'Product Compare', 'Product Name Click');"})[0].text.strip()

'LiveTour'

In [819]:
# Here I found the sintax of the blocks
# The first findAll I run from block to block (i.e Best For - Pricing...)

# Company responsable for the product list 0 and 1 for the items in soup_compare
soup_compare.findAll('div', {'class':'stack'})[1]\
    .findAll('p', {'class':"color-gray no-margin-bottom milli"})[1].text.replace('by\n', '').strip()

'TOPS Software'

In [846]:
# Here I found the sintax of the blocks
# The first findAll I run from block to block (i.e Best For - Pricing...)
# The second findAll I run from cel to cell (i.e inside of 'Best For: Who Uses This Software? - Real state agents...)

# Who uses this software list 1 and 2 for the items in soup_compare
soup_compare.findAll('div', {'class':'stack'})[2].findAll('td', {'class':'cell-divider'})[2].text

'ShowingHero is a online software for Property Managers by Property Managers. We love to work with technology driven and forward thinking organizations who are looking to simplify the leasing process'

In [390]:
# Target Customer Size (Users) list 4 and 5 for the items in soup_compare

soup_compare.findAll('div', {'class':'stack'})[2].findAll('td', {'class':'cell-divider'})[4].text

'1 - 1000+'

In [858]:
# Pricing section from 0 to 8

soup_compare.findAll('div', {'class':'stack'})[3].findAll('td', {'class':'cell-divider'})[8].text.strip()

'No'

In [889]:
# Platform section

#Works but not properly
#soup_compare.findAll('div', {'class':'stack'})[4].findAll('td', {'class':'cell-divider'})[4].findAll('li', {'class':'ss-check'})

# Further test
# Works the same as the one below but worse coding
#soup_compare.findAll('div', {'class':'stack'})[4].findAll('td', {'class':'cell-divider'})[4].findAll(lambda tag: tag.name ==
                                                                                                #     'li' and tag.get('class')
                                                                                                 #    == ['ss-check'])[0]
                                                                                                
# Works amazingly
soup_compare.findAll('div', {'class':'stack'})[4].findAll('td', {'class':'cell-divider'})[5].text.strip()

#[0].text.strip()

'Not provided by vendor'

In [883]:
# How to handle this exception? The code above is only used if there is a description
# If "not provided by vendor" the third findAll is not applicable!

soup_compare.findAll('div', {'class':'stack'})[4].findAll('td', {'class':'cell-divider'})[4].select('li[class=ss-check]')[1]\
    .text.strip()

'Residential Properties'

In [901]:
# Ratings section Ease to Use att 4 - 5 on last findAll

soup_compare.findAll('div', {'class':'stack'})[5].findAll('td', {'class':'cell-divider'})[4].text.strip().split()[0]

'4'

In [479]:
# Ratings section Customer Service att 7 - 8 on last findAll

soup_compare.findAll('div', {'class':'stack'})[5].findAll('td', {'class':'cell-divider'})[7].text.strip().split()[0]

'4'

In [480]:
# Ratings section Features & Functionality att 10 - 11 on last findAll

soup_compare.findAll('div', {'class':'stack'})[5].findAll('td', {'class':'cell-divider'})[10].text.strip().split()[0]

'4'

In [481]:
# Ratings section Value for Money att 13 - 14 on last findAll

soup_compare.findAll('div', {'class':'stack'})[5].findAll('td', {'class':'cell-divider'})[13].text.strip().split()[0]

'4.5'

In [551]:
# Ratings section Customer Service att 7 - 8 on last findAll

# Works with the feature disable problem
soup_compare.findAll('div', {'class':'stack'})[6].findAll('td', {'class':'cell-divider'})[1].findAll('li')

[<li class="ss-user feature-disabled">24/7 (Live Rep)</li>,
 <li class="ss-clock ">Business Hours</li>,
 <li class="ss-laptop ">Online</li>]

In [920]:
# Solution for feature disable, tranform each item on list in string and check if 'feature-disable' in string
# From 1 - 2 

a = soup_compare.findAll('div', {'class':'stack'})[6].findAll('td', {'class':'cell-divider'})[1].findAll('li')
for entry in a:
    if 'feature-disable' in str(entry):
        pass
    else:
        print(entry.text)
            

Business Hours
Online


In [979]:
# Training section 4 - 5 
# Note the special solution of "class=ss-check\ " due to a space after "check" 

len(soup_compare.findAll('div', {'class':'stack'})[6].findAll('td', {'class':'cell-divider'})[4].select("li[class=ss-check\ ] "))

2