# Testing

[Link to the website](https://www.thomasnet.com/)

In [1]:
import requests
from bs4 import BeautifulSoup
from traceback import print_exc
from multiprocessing import Pool

from datetime import datetime
import time
from tqdm import tqdm

import numpy as np 
import pandas as pd
import math

In [2]:
headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
}

payload = {
    # "WTZO": "Find Suppliers",
    "cov": "NA",
    "heading": 21650809,
    "searchsource": "suppliers",
    "searchterm": "hydraulic cylinders",
    # "searchx": "true",
    "what": "hydraulic cylinders",
    # "which": "prod",
    "pg": 1
}

BASE_URL = "https://www.thomasnet.com/nsearch.html"

In [3]:
page = requests.get(BASE_URL, params=payload)
soup = BeautifulSoup(page.text,"lxml")

total_suppliers = soup.find('p',class_="supplier-search-results__subheader").findAll('b')[-1].text
print(f"{total_suppliers} suppliers found")

n_suppliers = len(soup.findAll('div',class_="supplier-search-results__card"))
print(f"{n_suppliers} found on this page")

number_of_pages = math.ceil(float(total_suppliers)/float(n_suppliers))
print(f"Total Pages: {number_of_pages}")

772 suppliers found
25 found on this page
Total Pages: 31


In [4]:
def generate_payloads(keyword: str, page_num: int):
    payloads = []
    for i in range(page_num):
        payload = {
            "cov": "NA",
            "heading": 21650809,
            "searchsource": "suppliers",
            "searchterm": keyword,
            "what": keyword,
            "pg": i+1
        }
        payloads.append(payload)

    return payloads

x = generate_payloads('hydraulic cylinders',number_of_pages)

In [10]:
collected_data = []
keyword = 'hydraulic cylinders'
heading = 21650809

for i in tqdm(x):
    page = requests.get(BASE_URL, params=i)
    soup = BeautifulSoup(page.text,"lxml")

    suppliers = soup.findAll('div',class_="supplier-search-results__card")
    for sup in suppliers:
        card_data = {
            "company_id":"",
            "company_name":"",
            "company_type":"",
            "annual_revenue":"",
            "year_founded":"",
            "num_employees":"",
            "location":"",
            "company_url":"",
            "brands":"",
            "description":"",
            "url":"",
            "telephone":"",
            "searchterm":keyword
        }
        try:
            header = sup.find('header',class_='profile-card__header')
            card_data['company_id'] = eval(sup.get('data-impression-tracking'))['company_id']
            card_data['company_name'] = header.find('h2',class_='profile-card__title').text.strip()
            card_data['url'] = 'https://www.thomasnet.com'+header.find('h2',class_='profile-card__title').find('a').get('href')
            try:
                card_data['telephone'] = sup.find('a',{'data-conversion_action':'Call'}).get('href')
            except:
                pass

            sup_data = sup.find('div',class_='profile-card__supplier-data')
            card_data['location'] = sup_data.find('span',class_='profile-card__location').text.replace('ico-map','').strip()
            card_data['company_type'] = sup_data.find('span',{'data-content':'Company Type'}).text.strip()
            try:
                card_data['annual_revenue'] = sup_data.find('span',{'data-content':'Annual Revenue'}).text.strip()
            except:
                pass
            try:
                card_data['num_employees'] = sup_data.find('span',{'data-content':'Number of Employees'}).text.strip()
            except:
                pass
            try:
                card_data['year_founded'] = sup_data.find('span',{'data-content':'Year Founded'}).text.strip()
            except:
                pass

            content = sup.find('div',class_='profile-card__content')
            try:
                card_data['description'] = content.findAll('p')[0].text.strip()
            except:
                pass
            try:
                card_data['company_url'] = content.find('p',{'class':'profile-card_web-link-wrap'}).find('a').get('href')
            except:
                pass
            try:
                card_data['brands'] = content.find('p',{'class':'profile-card__brands__body'}).text.strip()
            except:
                pass
        except Exception as e:
            print("Error encountered while extraction of data\n",print_exc())
            pass

        collected_data.append(card_data)

df = pd.DataFrame(collected_data)
print(df.shape)
df.tail()

100%|██████████| 31/31 [02:18<00:00,  4.47s/it](772, 13)



Unnamed: 0,company_id,company_name,company_type,annual_revenue,year_founded,num_employees,location,company_url,brands,description,url,telephone,searchterm
767,10100771,Motion Hydraulics,"Manufacturer*, Service Company",,1974.0,,"Burlington, ON",,,,https://www.thomasnet.com/profile/10100771/mot...,tel://905-335-1171,hydraulic cylinders
768,30701202,Gaspe Machine Works Inc.,Manufacturer*,Under $1 Mil,1984.0,1-9,"Gaspe, QC",,,,https://www.thomasnet.com/profile/30701202/gas...,tel://418-368-6574,hydraulic cylinders
769,30164910,"T-Mac Cylinders, Inc",Manufacturer*,Under $1 Mil,1996.0,1-9,"Roscoe, IL",,,Manufacturer Of Hydraulic & Pneumatic Cylinder...,https://www.thomasnet.com/profile/30164910/tma...,tel://815-877-7090,hydraulic cylinders
770,30162771,Hydra Kinetics,Manufacturers' Rep*,Under $1 Mil,,1-9,"Heidenheimer, TX",,,"Mfr's. Rep. Of Cutom Made Hydraulic Cylinders,...",https://www.thomasnet.com/profile/30162771/hyd...,tel://254-983-1067,hydraulic cylinders
771,180836,"Production Engineering, Inc.","Custom Manufacturer*, Manufacturer, Service Co...",,1974.0,50-99,"Jackson, MI",,,Manufacturer of precision flexible machines & ...,https://www.thomasnet.com/profile/00180836/pro...,tel://517-788-6800,hydraulic cylinders


In [12]:
df.to_csv('data/hydraulic_cylinders_suppliers_metadata.csv',index=False)
df.loc[:,['company_id','url']].to_csv('data/suppliers_url.csv', index=False)

# Meta-Harvester

In [5]:
meta = pd.read_csv('..\data\hydraulic_cylinders\hc_suppliers_metadata.csv')
meta.tail()

Unnamed: 0,company_id,company_name,company_type,annual_revenue,year_founded,num_employees,location,company_url,brands,description,url,telephone,searchterm
767,10100771,Motion Hydraulics,"Manufacturer*, Service Company",,1974.0,,"Burlington, ON",,,,https://www.thomasnet.com/profile/10100771/mot...,tel://905-335-1171,hydraulic cylinders
768,30701202,Gaspe Machine Works Inc.,Manufacturer*,Under $1 Mil,1984.0,1-9,"Gaspe, QC",,,,https://www.thomasnet.com/profile/30701202/gas...,tel://418-368-6574,hydraulic cylinders
769,30164910,"T-Mac Cylinders, Inc",Manufacturer*,Under $1 Mil,1996.0,1-9,"Roscoe, IL",,,Manufacturer Of Hydraulic & Pneumatic Cylinder...,https://www.thomasnet.com/profile/30164910/tma...,tel://815-877-7090,hydraulic cylinders
770,30162771,Hydra Kinetics,Manufacturers' Rep*,Under $1 Mil,,1-9,"Heidenheimer, TX",,,"Mfr's. Rep. Of Cutom Made Hydraulic Cylinders,...",https://www.thomasnet.com/profile/30162771/hyd...,tel://254-983-1067,hydraulic cylinders
771,180836,"Production Engineering, Inc.","Custom Manufacturer*, Manufacturer, Service Co...",,1974.0,50-99,"Jackson, MI",,,Manufacturer of precision flexible machines & ...,https://www.thomasnet.com/profile/00180836/pro...,tel://517-788-6800,hydraulic cylinders


In [6]:
meta.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 772 entries, 0 to 771
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   company_id      772 non-null    int64  
 1   company_name    772 non-null    object 
 2   company_type    772 non-null    object 
 3   annual_revenue  552 non-null    object 
 4   year_founded    645 non-null    float64
 5   num_employees   659 non-null    object 
 6   location        771 non-null    object 
 7   company_url     61 non-null     object 
 8   brands          138 non-null    object 
 9   description     683 non-null    object 
 10  url             772 non-null    object 
 11  telephone       768 non-null    object 
 12  searchterm      772 non-null    object 
dtypes: float64(1), int64(1), object(11)
memory usage: 78.5+ KB


In [8]:
ref_urls = meta.loc[:,['company_id','url']]
ref_urls.head(10)

Unnamed: 0,company_id,url
0,1315677,https://www.thomasnet.com/profile/01315677/wor...
1,10055126,https://www.thomasnet.com/profile/10055126/ram...
2,1010077,https://www.thomasnet.com/profile/01010077/flu...
3,30082354,https://www.thomasnet.com/profile/30082354/ari...
4,1150392,https://www.thomasnet.com/branchloc.html?cid=1...
5,30851480,https://www.thomasnet.com/profile/30851480/inn...
6,129726,https://www.thomasnet.com/branchloc.html?cid=1...
7,1166920,https://www.thomasnet.com/profile/01166920/m-m...
8,1200093,https://www.thomasnet.com/profile/01200093/oil...
9,10070932,https://www.thomasnet.com/profile/10070932/had...


# Master Scraper

In [9]:
refs = ref_urls.values.tolist()

In [10]:
collected_data = []

for ref in tqdm(refs[600:620]):
    page = requests.get(ref[1]).text
    soup = BeautifulSoup(page,'lxml')

    page_data = {
        'company_id':ref[0],
        'company_name':'',
        'company_type':'',
        'company_url':'',
        'location':'',
        'telephone':'',
        'product_description':'',
        'about_company':'',
        'product_types':'',
        'all_product_services':'',
        'link_prod_services':'',
        'brands':'',
        'link_all_brands':'',
        'primary_company_type':'',
        'additional_activities':'',
        'key_personnel':'',
        'social_media':'',
        'other_locations':'',
        "annual_revenue":"",
        "year_founded":"",
        "num_employees":"",
        'url':ref[1]
    }
    try:
        gen_info = soup.find('div',{'class':'copro_naft'})
        try:
            page_data["company_name"] = gen_info.find('div',{'class':'codetail'}).find('h1').find('a').text.strip()
        except Exception as e:
            pass
        try:
            page_data["company_url"] = gen_info.find('div',{'class':'codetail'}).find('h1').find('a').get('href')
        except Exception as e:
            pass
        try:
            page_data["company_type"] = gen_info.find('div',{'class':'codetail'}).find('p').findAll('span')[2].text.strip()
        except Exception as e:
            page_data["company_type"] = gen_info.find('div',{'class':'codetail'}).find('p').find('span').text.strip()
            pass
        try:
            page_data["location"] = gen_info.find('p',{'class':'addrline'}).text.split('|')[0].strip()
        except Exception as e:
            pass
        try:
            page_data["telephone"] = gen_info.find('a',{'data-conversion_action':'Call'}).get('href')
        except Exception as e:
            pass

        ################# Business Description ####################
        business_desc = soup.find('div',{'id':'copro_description'})
        try:
            page_data['product_description'] = business_desc.find('div',{'id':'copro_pdm'}).text.strip()
        except Exception as e:
            pass
        try:
            page_data['about_company'] = business_desc.find('div',{'id':'copro_about'}).text.strip()
        except Exception as e:
            pass

        ################# Products and Services/Brands ####################
        prod_serv = soup.find('div',{'id':'copro_prodserv'})
        try:
            prod_cats = prod_serv.find('div',{'id':'copro_prodserv_cats'})
            prod_types = prod_cats.findAll('div',{'class':'prodserv_group'})[0]
            page_data['product_types'] = [i.text.strip() for i in prod_types.findAll('li')]
            all_prodserv = prod_cats.findAll('div',{'class':'prodserv_group'})[1]
            page_data['all_product_services'] = [i.text.strip() for i in all_prodserv.findAll('li')]
            page_data['link_prod_services'] = "https://www.thomasnet.com"+all_prodserv.find('a').get('href')
        except Exception as e:
            pass

        try:
            prod_brands = prod_serv.find('div',{'id':'copro_prodserv_brands'})
            brands = prod_brands.find('div',{'class':'prodserv_group'})
            page_data['brands'] = [i.text.strip() for i in brands.findAll('li')]
            page_data['link_all_brands'] = "https://www.thomasnet.com"+brands.find('a').get('href')
        except Exception as e:
            pass

        ################# Business Details ####################
        bus_det = soup.find('div',{'id':'copro_bizdetails'})
        col1 = bus_det.find('div',{'class':'bdcol1'})
        col1_ = col1.findAll('div',{'class':'bizdetail'})
        for div in col1_:
            label = div.find('div',{'class':'label'}).text.strip()
            if label=="Primary Company Type:":
                page_data['primary_company_type'] = div.find('li').text.strip()
            if label=="Additional Activities:":
                page_data['additional_activities'] = div.find('li').text.strip()
            if label=="Key Personnel:":
                page_data['key_personnel'] = [i.text.strip() for i in div.findAll('li')]
            if label=="Locations:":
                page_data['other_locations'] = "https://www.thomasnet.com"+div.find('a').get('href')
            if label=="Social:":
                page_data['social_media'] = [i.get('href') for i in div.findAll('a')]

        col2 = bus_det.find('div',{'class':'bdcol2'})
        # col2_ = col2.findAll('div',{'class':'certblock'})
        # for div in col2_:
        #     if div.find('h2'):
        #         if div.find('h2').text.strip()=="Diverse / Small Bus. Status":
        #             page_data['diverse_small_business_status'] = div.find('li').text.strip()
        #     if div.find('div',{'class':'label'}):
        #         label = div.find('div',{'class':'label'}).text.strip()
        #         if label=="Quality Certifications:":
        #             page_data['quality_certifications'] = div.find('li').text.strip()
        #         if label=="Registrations:":
        #             page_data['registrations'] = div.find('li').text.strip()

        col22_ = col2.findAll('div',{'class':'bizdetail'})
        for div in col22_:
            if div.find('div',{'class':'label'}):
                label = div.find('div',{'class':'label'}).text.strip()
                if label=="Annual Sales:":
                    page_data['annual_revenue'] = div.find('li').text.strip()
                if label=="No of Employees:":
                    page_data['num_employees'] = div.find('li').text.strip()
                if label=="Year Founded:":
                    page_data['year_founded'] = div.find('li').text.strip()

        ################# Consolidation of Data ####################
        collected_data.append(page_data)
    except Exception as e:
        print(f'\nError encountered while scraping {ref[1]}\n{print_exc()}')
        pass

master_df = pd.DataFrame(collected_data)

100%|██████████| 20/20 [00:34<00:00,  1.73s/it]


In [12]:
master_df.to_csv('data/hydraulic_cylinder_master_data.csv',index=False)
master_df

Unnamed: 0,company_id,company_name,company_type,company_url,location,telephone,product_description,about_company,product_types,all_product_services,...,link_all_brands,primary_company_type,additional_activities,key_personnel,social_media,other_locations,annual_revenue,year_founded,num_employees,url
0,126704,"Perfection Hydraulics, Inc.",Manufacturer,https://perfhyd.com/,"Evansville, IN 47711",tel://800-624-4862,"Manufacturer, distributor & repair of mobile h...",Company Description by Thomasnet\nManufacturer...,"[Cylinders: Hydraulic, Cylinders: Mill Type, H...","[Cylinders, Excavators, Hydraulic Equipment, M...",...,,Manufacturer,"Distributor, Service Company","[Steve Klamer, Pres., Kristi Klamer, COO, Jenn...",,,$10 - 24.9 Mil,1978,50-99,https://www.thomasnet.com/profile/00126704/per...
1,30691217,Amtec Hydraclamp Inc.,Manufacturer,https://www.amtechydraclamp.com/,"Burlington, ON L7L4X9",tel://905-335-8233,,,"[Cylinders: Hydraulic, Cylinders, Cylinders: C...","[Clamps, Controls and Controllers, Couplers an...",...,,Manufacturer,,[Not Available],,,Under $1 Mil,1988,1-9,https://www.thomasnet.com/profile/30691217/amt...
2,220308,"Little Air Giant, Inc.",Manufacturer,http://www.littleairgiant.com,"San Pedro, CA 90731",tel://800-833-0008,,Company Description by Thomasnet\nManufacturer...,"[Cylinders: Hydraulic, Cylinders, Cylinders: A...","[Actuators, Cylinders, Grinders, Hydraulic Equ...",...,,Manufacturer,Custom Manufacturer,[Not Available],,,Under $1 Mil,1948,10-49,https://www.thomasnet.com/profile/00220308/lit...
3,10078784,Dynamic Fluid Products Inc.,Manufacturer,http://www.dynamicfluid.ca,"Tillsonburg, ON N4G4H5",tel://800-265-2656,"Specializing In The Fluid Power Industry, Deve...",Company Description by Thomasnet\nSpecializing...,"[Cylinders: Hydraulic, Cylinders, Cylinders: Air]","[Automation Systems, Boosters, Couplers and Co...",...,,Manufacturer,"Distributor, Custom Manufacturer, Manufacturer...",[Not Available],,,$1 - 4.9 Mil,Not Available,10-49,https://www.thomasnet.com/profile/10078784/dyn...
4,10099788,Cylindrix Mfg. Co. Inc.,Manufacturer,http://www.cylindrix.com,"Montreal, QC H1Z2Z3",tel://514-374-3860,"Heavy Duty, Mill Type",Company Description by Thomasnet\nCylinders,"[Cylinders: Hydraulic, Cylinders: Air, Cylinde...","[Cylinders, Manifolds, Turbines]",...,,Manufacturer,,[Not Available],,,Under $1 Mil,1974,1-9,https://www.thomasnet.com/profile/10099788/cyl...
5,10042396,Woodings Industrial Corporation,Manufacturer,http://www.woodingsindustrial.com,"Mars, PA 16046",tel://724-625-3131,,Company Description by Thomasnet\nDrills: Blas...,"[Cylinders: Hydraulic, Cylinders, Cylinders: A...","[Bits, Chisels, Cylinders, Drills, Points, Ste...",...,,Manufacturer,,[Not Available],,,$10 - 24.9 Mil,1960,50-99,https://www.thomasnet.com/profile/10042396/woo...
6,30689257,Empire Hydraulics & Hard Chrome,Service Company,https://www.empirehydraulics.ca/,"Edmonton, AB T5S0B8",tel://780-483-8001,,,[Cylinders: Hydraulic],"[Cylinders, Machining, Maintenance and Repair ...",...,,Service Company,"Manufacturer, Custom Manufacturer",[Not Available],,,$1 - 4.9 Mil,1979,10-49,https://www.thomasnet.com/profile/30689257/emp...
7,30683026,Pelletier Equipement Ltee/Ltd.,Manufacturer,https://pelletierequipement.nb.ca/fr/index.php,"Lac Baker, NB E7A1N2",tel://506-992-2356,,,[Cylinders: Hydraulic],[Cylinders],...,,Manufacturer,,[Not Available],,,$1 - 4.9 Mil,1986,10-49,https://www.thomasnet.com/profile/30683026/pel...
8,10019085,Hydraulic Specialty Inc.,Distributor,https://hydraulicspecialty.com/,"Fridley, MN 55432",tel://800-622-0788,Distributor of industrial hydraulic equipment ...,Company Description by Thomasnet\nDistributor ...,"[Cylinders: Hydraulic, Cylinders, Cylinders: A...","[Absorbers, Accumulators, Actuators, Adapters,...",...,,Distributor,"Manufacturer, Custom Manufacturer, Service Com...",[Not Available],,,$1 - 4.9 Mil,2013,10-49,https://www.thomasnet.com/profile/10019085/hyd...
9,10035048,"Shore Western Mfg., Inc.",Manufacturer,http://www.shorewestern.com,"Monrovia, CA 91016",tel://626-357-3251,,Company Description by Thomasnet\nElectrohydra...,[Cylinders: Hydraulic],"[Actuators, Chambers, Controls and Controllers...",...,,Manufacturer,,[Not Available],,,$5 - 9.9 Mil,1971,10-49,https://www.thomasnet.com/profile/10035048/sho...


In [11]:
master_df.url.to_list()

['https://www.thomasnet.com/profile/00912997/smiths-hydraulic-inc.html?cid=912997&cov=NA&heading=21650809&searchpos=101&what=hydraulic+cylinders',
 'https://www.thomasnet.com/profile/20045567/titan-worldwide-inc.html?cid=20045567&cov=NA&heading=21650809&searchpos=102&what=hydraulic+cylinders',
 'https://www.thomasnet.com/profile/10054697/knott-brake-co.html?cid=10054697&cov=NA&heading=21650809&searchpos=103&what=hydraulic+cylinders',
 'https://www.thomasnet.com/profile/00360899/general-engineering-co.html?cid=360899&cov=NA&heading=21650809&searchpos=104&what=hydraulic+cylinders',
 'https://www.thomasnet.com/profile/01172128/mti-manufacturing-inc.html?cid=1172128&cov=NA&heading=21650809&searchpos=105&what=hydraulic+cylinders',
 'https://www.thomasnet.com/profile/00454701/price-engineering-co-inc.html?cid=454701&cov=NA&heading=21650809&searchpos=106&what=hydraulic+cylinders',
 'https://www.thomasnet.com/profile/30848002/ads-machine-shop-inc.html?cid=30848002&cov=NA&heading=21650809&searc

In [27]:
resp = requests.get('https://www.thomasnet.com/profile/00912997/smiths-hydraulic-inc.html?cid=912997&cov=NA&heading=21650809&searchpos=101&what=hydraulic+cylinders').text
soup = BeautifulSoup(resp,"lxml")
col2 = bus_det.find('div',{'class':'bdcol2'})

In [None]:
collected_data = []

def fast_extract(ref):
    page = requests.get(ref[1]).text
    soup = BeautifulSoup(page,'lxml')

    page_data = {
        'company_id':ref[0],
        'company_name':'',
        'company_type':'',
        'company_url':'',
        'location':'',
        'telephone':'',
        'product_description':'',
        'about_company':'',
        'product_types':'',
        'all_product_services':'',
        'link_prod_services':'',
        'brands':'',
        'link_all_brands':'',
        'primary_company_type':'',
        'additional_activities':'',
        'key_personnel':'',
        'social_media':'',
        'other_locations':'',
        "annual_revenue":"",
        "year_founded":"",
        "num_employees":"",
        'url':ref[1]
    }
    result = {"page_data": page_data, "success": False}
    try:
        gen_info = soup.find('div',{'class':'copro_naft'})
        try:
            page_data["company_name"] = gen_info.find('div',{'class':'codetail'}).find('h1').find('a').text.strip()
        except Exception as e:
            pass
        try:
            page_data["company_url"] = gen_info.find('div',{'class':'codetail'}).find('h1').find('a').get('href')
        except Exception as e:
            pass
        try:
            page_data["company_type"] = gen_info.find('div',{'class':'codetail'}).find('p').findAll('span')[2].text.strip()
        except Exception as e:
            page_data["company_type"] = gen_info.find('div',{'class':'codetail'}).find('p').find('span').text.strip()
            pass
        try:
            page_data["location"] = gen_info.find('p',{'class':'addrline'}).text.split('|')[0].strip()
        except Exception as e:
            pass
        try:
            page_data["telephone"] = gen_info.find('a',{'data-conversion_action':'Call'}).get('href')
        except Exception as e:
            pass

        ################# Business Description ####################
        business_desc = soup.find('div',{'id':'copro_description'})
        try:
            page_data['product_description'] = business_desc.find('div',{'id':'copro_pdm'}).text.strip()
        except Exception as e:
            pass
        try:
            page_data['about_company'] = business_desc.find('div',{'id':'copro_about'}).text.strip()
        except Exception as e:
            pass

        ################# Products and Services/Brands ####################
        prod_serv = soup.find('div',{'id':'copro_prodserv'})
        try:
            prod_cats = prod_serv.find('div',{'id':'copro_prodserv_cats'})
            prod_types = prod_cats.findAll('div',{'class':'prodserv_group'})[0]
            page_data['product_types'] = [i.text.strip() for i in prod_types.findAll('li')]
            all_prodserv = prod_cats.findAll('div',{'class':'prodserv_group'})[1]
            page_data['all_product_services'] = [i.text.strip() for i in all_prodserv.findAll('li')]
            page_data['link_prod_services'] = "https://www.thomasnet.com"+all_prodserv.find('a').get('href')
        except Exception as e:
            pass

        try:
            prod_brands = prod_serv.find('div',{'id':'copro_prodserv_brands'})
            brands = prod_brands.find('div',{'class':'prodserv_group'})
            page_data['brands'] = [i.text.strip() for i in brands.findAll('li')]
            page_data['link_all_brands'] = "https://www.thomasnet.com"+brands.find('a').get('href')
        except Exception as e:
            pass

        ################# Business Details ####################
        bus_det = soup.find('div',{'id':'copro_bizdetails'})
        col1 = bus_det.find('div',{'class':'bdcol1'})
        col1_ = col1.findAll('div',{'class':'bizdetail'})
        for div in col1_:
            label = div.find('div',{'class':'label'}).text.strip()
            if label=="Primary Company Type:":
                page_data['primary_company_type'] = div.find('li').text.strip()
            if label=="Additional Activities:":
                page_data['additional_activities'] = div.find('li').text.strip()
            if label=="Key Personnel:":
                page_data['key_personnel'] = [i.text.strip() for i in div.findAll('li')]
            if label=="Locations:":
                page_data['other_locations'] = "https://www.thomasnet.com"+div.find('a').get('href')
            if label=="Social:":
                page_data['social_media'] = [i.get('href') for i in div.findAll('a')]

        col2 = bus_det.find('div',{'class':'bdcol2'})
        # col2_ = col2.findAll('div',{'class':'certblock'})
        # for div in col2_:
        #     if div.find('h2'):
        #         if div.find('h2').text.strip()=="Diverse / Small Bus. Status":
        #             page_data['diverse_small_business_status'] = div.find('li').text.strip()
        #     if div.find('div',{'class':'label'}):
        #         label = div.find('div',{'class':'label'}).text.strip()
        #         if label=="Quality Certifications:":
        #             page_data['quality_certifications'] = div.find('li').text.strip()
        #         if label=="Registrations:":
        #             page_data['registrations'] = div.find('li').text.strip()

        col22_ = col2.findAll('div',{'class':'bizdetail'})
        for div in col22_:
            if div.find('div',{'class':'label'}):
                label = div.find('div',{'class':'label'}).text.strip()
                if label=="Annual Sales:":
                    page_data['annual_revenue'] = div.find('li').text.strip()
                if label=="No of Employees:":
                    page_data['num_employees'] = div.find('li').text.strip()
                if label=="Year Founded:":
                    page_data['year_founded'] = div.find('li').text.strip()

        ################# Consolidation of Data ####################
        # collected_data.append(page_data)
        result = {"page_data": page_data, "success": True}
        print(f"Successfully scraped page {ref[1]}")
    except Exception as e:
        print(f'\nError encountered while scraping {ref[1]}\n{print_exc()}')
    finally:
        return result

pool = Pool(processes=15)
final_result = pool.map(fast_extract, ref_urls.values.tolist()[:20])
for result in final_result:
    if result["success"]:
        collected_data.extend(result["page_data"])

df = pd.DataFrame(collected_data)