In [3]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
df = pd.read_csv("kw_email_use_selenium.csv")

url_list = df['agent_url'].unique()
def get_driver():
    options = webdriver.FirefoxOptions()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    driver = webdriver.Firefox(options=options)
    return driver


def collect_agent_contact_details(driver):
    try:
        agent_info = driver.find_element_by_class_name("AgentInformation")
    except:
        agent_info = ""
    try:
        info_container = agent_info.find_elements_by_class_name("AgentInformation__factBody")
    except:
        info_container = ""
    try:
        agent_email = [email.text for email in info_container if email.get_attribute("aria-label") == 'Agent E-mail'][0]
    except:
        agent_email = ""
    try:
        mobile_number = driver.find_element_by_class_name("AgentInformation__phoneMobileNumber").text
    except:
        mobile_number = ""
    try:
        office_number = driver.find_element_by_class_name("AgentInformation__phoneOfficeNumber").text
    except:
        office_number = ""
    try:
        agent_website = [i.text for i in info_container if i.get_attribute("aria-label") == 'Agent Website'][0]
    except:
        agent_website = ""
    contact_dict = {"agent_email":agent_email,"mobile_number":mobile_number,"office_number":office_number,'agent_website':agent_website }
    return contact_dict


def get_agent_details(driver):
    try:
        agent_licenses = driver.find_element_by_class_name("AgentContent__licenses").text
    except:
        agent_licenses = ""
    try:
        agent_bio = driver.find_element_by_class_name("AgentContent__bio").text
    except:
        agent_bio = ""
    try:
        serviceAreas = driver.find_element_by_class_name("AgentContent__serviceAreas").text
    except:
        serviceAreas = ""
    try:
        agent_team_name = driver.find_element_by_class_name("AgentContent__teamName").text
    except:
        agent_team_name = ""
    try:
        agent_team_info_ = driver.find_element_by_class_name("AgentContent__teamText").text.split("\n")
        agent_team_info = ", ".join(agent_team_info_)
    except:
        agent_team_info = ""
    try:
        logo_url = driver.find_element_by_class_name('AgentContent__teamAvatar').find_element_by_class_name("KWImage__image").get_attribute("src")
    except:
        logo_url = ""
    agent_details = {"agent_licenses":agent_licenses, "agent_bio":agent_bio, "serviceAreas":serviceAreas,"agent_team_name":agent_team_name,
                    "agent_team_info":agent_team_info,'logo_url':logo_url}
    return agent_details


def social_media_details(driver):
    try:
        social_media_container = driver.find_element_by_class_name("AgentInformation__socialMedia").find_elements_by_class_name("link")
    except:
        social_media_container = []
    social_media_links = [social_media.get_attribute("href") for social_media in social_media_container]
    try:
        facebook = [facebook for facebook in social_media_links if 'facebook' in facebook][0]
    except:
        facebook = ""
    try:
        instagram = [instagram for instagram in social_media_links if 'instagram' in instagram][0]
    except:
        instagram = ""
    try:
        twitter = [twitter for twitter in social_media_links if 'twitter' in twitter][0]
    except:
        twitter = ""
    try:
        linkedin = [linkedin for linkedin in social_media_links if 'linkedin' in linkedin][0]
    except:
        linkedin = ""
    social_media_dict = {"facebook":facebook,"instagram":instagram,"twitter":twitter,"linkedin":linkedin}
    social_media_links_str = "|".join(social_media_links)
    return social_media_dict,social_media_links_str


def collect_other_info(driver):
    container = driver.find_elements_by_class_name("AgentContent__section")
    try:
        Market_Cente = [i for i in container if 'Market Center' in i.text][0].find_element_by_class_name("AgentContent__sectionText").text
    except:
        Market_Cente = ""
    try:
        Languages = [i for i in container if 'Languages' in i.text][0].find_element_by_class_name("AgentContent__sectionText").text
    except:
        Languages = ""
    try:
        Specialties_designation = [i for i in container if 'Specialties and Designations' in i.text][0].find_element_by_class_name("AgentContent__sectionText").text
    except:
        Specialties_designation = ""
    other_info = {"Market_Cente":Market_Cente, "Languages":Languages, "Specialties_Designations":Specialties_designation}
    return other_info

def wait_to_page_load(driver):
    try:
        w = WebDriverWait(driver, 3)
        w.until(EC.presence_of_element_located((By.CLASS_NAME,"AgentContent__name")))
        print("Page load happened")
    except Exception as e:
        print("Timeout happened no page load")
    return driver

url_count = 46128
driver_count = 0
driver = get_driver()
for url in url_list[url_count:47000]:
    print(url)
    if driver_count >=100:
        print("need to close driver")
        driver.quit()
        driver_count = 0
        driver = get_driver()
        print("new driver initialize****************")
    driver.get(url)
    driver = wait_to_page_load(driver)
    try:
        agent_name = driver.find_element_by_class_name("AgentContent__name").text
    except:
        agent_name = ""
    try:
        time.sleep(0.5)
        profile_url = driver.find_element_by_class_name("AvatarImage__bg").get_attribute("style").replace('background-image: url("','').replace('");','')
    except:
        profile_url = ""
    try:
        tag = driver.find_element_by_class_name("pill").text
    except:
        tag = ""
    try:
        role = driver.find_element_by_class_name("AgentContent__team").text
    except:
        role = ""
    try:
        location = driver.find_element_by_class_name("AgentContent__location").text
    except:
        location = ""
    contact_dict = collect_agent_contact_details(driver)
    agent_details = get_agent_details(driver)
    social_media_dict,social_media_links_str = social_media_details(driver)
    other_info = collect_other_info(driver)
    data_dict = {'agent_url':url,"agent_name":agent_name, 'agent_role':role,'location':location,"agent_dp":profile_url, "tag":tag,"agent_email":contact_dict['agent_email'], "mobile_number":contact_dict['mobile_number'],"office_number":contact_dict['office_number'],'agent_license':agent_details['agent_licenses'], 'agent_bio':agent_details['agent_bio'],'serviceAreas':agent_details['serviceAreas'],'office_name':agent_details['agent_team_name'],'office_address':agent_details['agent_team_info'],'logo_url':agent_details['logo_url'],'facebook':social_media_dict['facebook'], 'instagram':social_media_dict['instagram'],'twitter':social_media_dict['twitter'],'linkedin':social_media_dict['linkedin'],'Market_Cente':other_info['Market_Cente'],'agent_language':other_info['Languages'],'Specialties_Designations':other_info['Specialties_Designations'],'social_media_links_str':social_media_links_str,'agent_website':contact_dict['agent_website']}
    data_df = pd.DataFrame(data_dict,index=[0],columns=['agent_url','agent_name', 'agent_role','location','agent_dp', 'tag','agent_email', 'mobile_number','office_number', 'agent_license', 'agent_bio', 'serviceAreas','office_name', 'office_address','logo_url','facebook', 'instagram', 'twitter', 'linkedin','Market_Cente', 'agent_language', 'Specialties_Designations','social_media_links_str','agent_website'])
    with open("kw_data-emailSelenium_46k_47K.csv",'a',newline='',encoding='utf-8') as f:
        data_df.to_csv(f, mode='a',header=f.tell()==0)
    print(url_count,url)
    url_count+=1
    driver_count+=1
    print("***********************************")
driver.quit()


https://kw.com/agent/UPA-6587385174290694150-3
Page load happened
46128 https://kw.com/agent/UPA-6587385174290694150-3
***********************************
https://kw.com/agent/UPA-6587385153588051969-3
Page load happened
46129 https://kw.com/agent/UPA-6587385153588051969-3
***********************************
https://kw.com/agent/UPA-6587385346269569027-4
Page load happened
46130 https://kw.com/agent/UPA-6587385346269569027-4
***********************************
https://kw.com/agent/UPA-6587385379669311492-5
Page load happened
46131 https://kw.com/agent/UPA-6587385379669311492-5
***********************************
https://kw.com/agent/UPA-6773612728710135808-3
Page load happened
46132 https://kw.com/agent/UPA-6773612728710135808-3
***********************************
https://kw.com/agent/UPA-6587385162102460417-3
Page load happened
46133 https://kw.com/agent/UPA-6587385162102460417-3
***********************************
https://kw.com/agent/UPA-6766077123278884864-1
Page load happened
4613

Page load happened
46181 https://kw.com/agent/UPA-6744303582711463936-5
***********************************
https://kw.com/agent/UPA-6761048737388449792-9
Page load happened
46182 https://kw.com/agent/UPA-6761048737388449792-9
***********************************
https://kw.com/agent/UPA-6587385215146168323-9
Page load happened
46183 https://kw.com/agent/UPA-6587385215146168323-9
***********************************
https://kw.com/agent/UPA-6587385100643082242-7
Page load happened
46184 https://kw.com/agent/UPA-6587385100643082242-7
***********************************
https://kw.com/agent/UPA-6587385177718689794-9
Page load happened
46185 https://kw.com/agent/UPA-6587385177718689794-9
***********************************
https://kw.com/agent/UPA-6587385317469143043-8
Page load happened
46186 https://kw.com/agent/UPA-6587385317469143043-8
***********************************
https://kw.com/agent/UPA-6587385155910012935-8
Page load happened
46187 https://kw.com/agent/UPA-6587385155910012935-

Page load happened
46234 https://kw.com/agent/UPA-6587385278580482055-5
***********************************
https://kw.com/agent/UPA-6587385228011913220-2
Page load happened
46235 https://kw.com/agent/UPA-6587385228011913220-2
***********************************
https://kw.com/agent/UPA-6715011029349392384-0
Page load happened
46236 https://kw.com/agent/UPA-6715011029349392384-0
***********************************
https://kw.com/agent/UPA-6791023055658151936-3
Page load happened
46237 https://kw.com/agent/UPA-6791023055658151936-3
***********************************
https://kw.com/agent/UPA-6587385179286794244-5
Page load happened
46238 https://kw.com/agent/UPA-6587385179286794244-5
***********************************
https://kw.com/agent/UPA-6587385187662872576-3
Page load happened
46239 https://kw.com/agent/UPA-6587385187662872576-3
***********************************
https://kw.com/agent/UPA-6767179636132323328-0
Page load happened
46240 https://kw.com/agent/UPA-6767179636132323328-

Page load happened
46287 https://kw.com/agent/UPA-6587385017110839300-6
***********************************
https://kw.com/agent/UPA-6587385345007829000-2
Page load happened
46288 https://kw.com/agent/UPA-6587385345007829000-2
***********************************
https://kw.com/agent/UPA-6771166442732584960-7
Page load happened
46289 https://kw.com/agent/UPA-6771166442732584960-7
***********************************
https://kw.com/agent/UPA-6592934391616937984-5
Page load happened
46290 https://kw.com/agent/UPA-6592934391616937984-5
***********************************
https://kw.com/agent/UPA-6824467211994435584-5
Page load happened
46291 https://kw.com/agent/UPA-6824467211994435584-5
***********************************
https://kw.com/agent/UPA-6587385181188018176-4
Page load happened
46292 https://kw.com/agent/UPA-6587385181188018176-4
***********************************
https://kw.com/agent/UPA-6587384972876021760-6
Page load happened
46293 https://kw.com/agent/UPA-6587384972876021760-

Page load happened
46340 https://kw.com/agent/UPA-6587385395412819975-1
***********************************
https://kw.com/agent/UPA-6587385165291450373-7
Page load happened
46341 https://kw.com/agent/UPA-6587385165291450373-7
***********************************
https://kw.com/agent/UPA-6587385262866333699-6
Page load happened
46342 https://kw.com/agent/UPA-6587385262866333699-6
***********************************
https://kw.com/agent/UPA-6587385267863425029-4
Page load happened
46343 https://kw.com/agent/UPA-6587385267863425029-4
***********************************
https://kw.com/agent/UPA-6742446962476224512-4
Page load happened
46344 https://kw.com/agent/UPA-6742446962476224512-4
***********************************
https://kw.com/agent/UPA-6587385370566230016-4
Page load happened
46345 https://kw.com/agent/UPA-6587385370566230016-4
***********************************
https://kw.com/agent/UPA-6587385330210193410-5
Page load happened
46346 https://kw.com/agent/UPA-6587385330210193410-

Page load happened
46393 https://kw.com/agent/UPA-6587384988824866821-0
***********************************
https://kw.com/agent/UPA-6587385338088910851-0
Page load happened
46394 https://kw.com/agent/UPA-6587385338088910851-0
***********************************
https://kw.com/agent/UPA-6587385176789151749-5
Page load happened
46395 https://kw.com/agent/UPA-6587385176789151749-5
***********************************
https://kw.com/agent/UPA-6800171334023475200-1
Page load happened
46396 https://kw.com/agent/UPA-6800171334023475200-1
***********************************
https://kw.com/agent/UPA-6800171333184458752-2
Timeout happened no page load
46397 https://kw.com/agent/UPA-6800171333184458752-2
***********************************
https://kw.com/agent/UPA-6587385184854904834-2
Page load happened
46398 https://kw.com/agent/UPA-6587385184854904834-2
***********************************
https://kw.com/agent/UPA-6762468360141324288-5
Page load happened
46399 https://kw.com/agent/UPA-676246836

Page load happened
46446 https://kw.com/agent/UPA-6587385223501512709-1
***********************************
https://kw.com/agent/UPA-6844368492169203712-2
Page load happened
46447 https://kw.com/agent/UPA-6844368492169203712-2
***********************************
https://kw.com/agent/UPA-6762543870621589504-4
Page load happened
46448 https://kw.com/agent/UPA-6762543870621589504-4
***********************************
https://kw.com/agent/UPA-6587385338097299459-2
Page load happened
46449 https://kw.com/agent/UPA-6587385338097299459-2
***********************************
https://kw.com/agent/UPA-6643899813131767809-4
Page load happened
46450 https://kw.com/agent/UPA-6643899813131767809-4
***********************************
https://kw.com/agent/UPA-6587385240688414722-8
Page load happened
46451 https://kw.com/agent/UPA-6587385240688414722-8
***********************************
https://kw.com/agent/UPA-6587384985821532162-1
Page load happened
46452 https://kw.com/agent/UPA-6587384985821532162-

Page load happened
46499 https://kw.com/agent/UPA-6774850759425122304-3
***********************************
https://kw.com/agent/UPA-6587385222425833473-4
Page load happened
46500 https://kw.com/agent/UPA-6587385222425833473-4
***********************************
https://kw.com/agent/UPA-6587385026533371904-7
Page load happened
46501 https://kw.com/agent/UPA-6587385026533371904-7
***********************************
https://kw.com/agent/UPA-6587385294816866307-0
Page load happened
46502 https://kw.com/agent/UPA-6587385294816866307-0
***********************************
https://kw.com/agent/UPA-6808113479970770944-8
Page load happened
46503 https://kw.com/agent/UPA-6808113479970770944-8
***********************************
https://kw.com/agent/UPA-6839717037967360000-6
Page load happened
46504 https://kw.com/agent/UPA-6839717037967360000-6
***********************************
https://kw.com/agent/UPA-6587385060109729794-7
Page load happened
46505 https://kw.com/agent/UPA-6587385060109729794-

Page load happened
46552 https://kw.com/agent/UPA-6752729601079750656-1
***********************************
https://kw.com/agent/UPA-6597235428162097152-9
Page load happened
46553 https://kw.com/agent/UPA-6597235428162097152-9
***********************************
https://kw.com/agent/UPA-6587385302264893444-8
Page load happened
46554 https://kw.com/agent/UPA-6587385302264893444-8
***********************************
https://kw.com/agent/UPA-6587385298404491268-5
Page load happened
46555 https://kw.com/agent/UPA-6587385298404491268-5
***********************************
https://kw.com/agent/UPA-6587385171985039362-7
Page load happened
46556 https://kw.com/agent/UPA-6587385171985039362-7
***********************************
https://kw.com/agent/UPA-6587385379493150721-6
Page load happened
46557 https://kw.com/agent/UPA-6587385379493150721-6
***********************************
https://kw.com/agent/UPA-6587385251094810624-6
Page load happened
46558 https://kw.com/agent/UPA-6587385251094810624-

Page load happened
46605 https://kw.com/agent/UPA-6587385243032932353-3
***********************************
https://kw.com/agent/UPA-6778686888869560320-1
Page load happened
46606 https://kw.com/agent/UPA-6778686888869560320-1
***********************************
https://kw.com/agent/UPA-6587384970888318977-4
Page load happened
46607 https://kw.com/agent/UPA-6587384970888318977-4
***********************************
https://kw.com/agent/UPA-6587385154515324933-2
Page load happened
46608 https://kw.com/agent/UPA-6587385154515324933-2
***********************************
https://kw.com/agent/UPA-6752714602707251200-3
Page load happened
46609 https://kw.com/agent/UPA-6752714602707251200-3
***********************************
https://kw.com/agent/UPA-6587385186449379330-2
Page load happened
46610 https://kw.com/agent/UPA-6587385186449379330-2
***********************************
https://kw.com/agent/UPA-6648979127158374401-3
Page load happened
46611 https://kw.com/agent/UPA-6648979127158374401-

Timeout happened no page load
46658 https://kw.com/agent/UPA-6813896943686377472-6
***********************************
https://kw.com/agent/UPA-6587385170659016708-4
Page load happened
46659 https://kw.com/agent/UPA-6587385170659016708-4
***********************************
https://kw.com/agent/UPA-6587385344848445445-3
Page load happened
46660 https://kw.com/agent/UPA-6587385344848445445-3
***********************************
https://kw.com/agent/UPA-6587385075880988672-2
Page load happened
46661 https://kw.com/agent/UPA-6587385075880988672-2
***********************************
https://kw.com/agent/UPA-6587385403497398280-5
Page load happened
46662 https://kw.com/agent/UPA-6587385403497398280-5
***********************************
https://kw.com/agent/UPA-6587385422317404165-5
Page load happened
46663 https://kw.com/agent/UPA-6587385422317404165-5
***********************************
https://kw.com/agent/UPA-6735275338081251328-5
Page load happened
46664 https://kw.com/agent/UPA-673527533

Page load happened
46711 https://kw.com/agent/UPA-6592246196156624899-0
***********************************
https://kw.com/agent/UPA-6587385398655647749-8
Page load happened
46712 https://kw.com/agent/UPA-6587385398655647749-8
***********************************
https://kw.com/agent/UPA-6587385260547469314-8
Page load happened
46713 https://kw.com/agent/UPA-6587385260547469314-8
***********************************
https://kw.com/agent/UPA-6587385440876081153-7
Page load happened
46714 https://kw.com/agent/UPA-6587385440876081153-7
***********************************
https://kw.com/agent/UPA-6592265607184777223-2
Page load happened
46715 https://kw.com/agent/UPA-6592265607184777223-2
***********************************
https://kw.com/agent/UPA-6587385399782404098-4
Page load happened
46716 https://kw.com/agent/UPA-6587385399782404098-4
***********************************
https://kw.com/agent/UPA-6587385354391162880-0
Page load happened
46717 https://kw.com/agent/UPA-6587385354391162880-

Page load happened
46764 https://kw.com/agent/UPA-6645352624193024003-2
***********************************
https://kw.com/agent/UPA-6587385174257139719-1
Page load happened
46765 https://kw.com/agent/UPA-6587385174257139719-1
***********************************
https://kw.com/agent/UPA-6587385066024337412-7
Page load happened
46766 https://kw.com/agent/UPA-6587385066024337412-7
***********************************
https://kw.com/agent/UPA-6587385197054595077-8
Page load happened
46767 https://kw.com/agent/UPA-6587385197054595077-8
***********************************
https://kw.com/agent/UPA-6587385170742902784-3
Page load happened
46768 https://kw.com/agent/UPA-6587385170742902784-3
***********************************
https://kw.com/agent/UPA-6587385062659121154-5
Page load happened
46769 https://kw.com/agent/UPA-6587385062659121154-5
***********************************
https://kw.com/agent/UPA-6683482823241240576-5
Page load happened
46770 https://kw.com/agent/UPA-6683482823241240576-

Page load happened
46817 https://kw.com/agent/UPA-6683527736925171712-7
***********************************
https://kw.com/agent/UPA-6587385170516410370-5
Page load happened
46818 https://kw.com/agent/UPA-6587385170516410370-5
***********************************
https://kw.com/agent/UPA-6587385174294888456-6
Page load happened
46819 https://kw.com/agent/UPA-6587385174294888456-6
***********************************
https://kw.com/agent/UPA-6716746845466238976-8
Page load happened
46820 https://kw.com/agent/UPA-6716746845466238976-8
***********************************
https://kw.com/agent/UPA-6587385195992195074-4
Page load happened
46821 https://kw.com/agent/UPA-6587385195992195074-4
***********************************
https://kw.com/agent/UPA-6587385233904336897-2
Timeout happened no page load
46822 https://kw.com/agent/UPA-6587385233904336897-2
***********************************
https://kw.com/agent/UPA-6782369564357320704-5
Page load happened
46823 https://kw.com/agent/UPA-678236956

Page load happened
46870 https://kw.com/agent/UPA-6691093828681633792-3
***********************************
https://kw.com/agent/UPA-6587385091691089926-2
Page load happened
46871 https://kw.com/agent/UPA-6587385091691089926-2
***********************************
https://kw.com/agent/UPA-6829495692956340224-0
Page load happened
46872 https://kw.com/agent/UPA-6829495692956340224-0
***********************************
https://kw.com/agent/UPA-6773763883570425856-3
Page load happened
46873 https://kw.com/agent/UPA-6773763883570425856-3
***********************************
https://kw.com/agent/UPA-6605415447984668677-5
Page load happened
46874 https://kw.com/agent/UPA-6605415447984668677-5
***********************************
https://kw.com/agent/UPA-6587385162602475524-2
Page load happened
46875 https://kw.com/agent/UPA-6587385162602475524-2
***********************************
https://kw.com/agent/UPA-6743248140959023104-2
Page load happened
46876 https://kw.com/agent/UPA-6743248140959023104-

Page load happened
46923 https://kw.com/agent/UPA-6587385255934234632-3
***********************************
https://kw.com/agent/UPA-6587385295068524549-0
Page load happened
46924 https://kw.com/agent/UPA-6587385295068524549-0
***********************************
https://kw.com/agent/UPA-6783441394938011648-4
Page load happened
46925 https://kw.com/agent/UPA-6783441394938011648-4
***********************************
https://kw.com/agent/UPA-6694627235336560640-6
Page load happened
46926 https://kw.com/agent/UPA-6694627235336560640-6
***********************************
https://kw.com/agent/UPA-6587385204280295425-7
Page load happened
46927 https://kw.com/agent/UPA-6587385204280295425-7
***********************************
https://kw.com/agent/UPA-6587385240629694468-3
need to close driver
new driver initialize****************
Page load happened
46928 https://kw.com/agent/UPA-6587385240629694468-3
***********************************
https://kw.com/agent/UPA-6587385307943776257-3
Page load h

Page load happened
46976 https://kw.com/agent/UPA-6838208002434949120-9
***********************************
https://kw.com/agent/UPA-6587385313923440640-0
Page load happened
46977 https://kw.com/agent/UPA-6587385313923440640-0
***********************************
https://kw.com/agent/UPA-6587385439744327682-7
Page load happened
46978 https://kw.com/agent/UPA-6587385439744327682-7
***********************************
https://kw.com/agent/UPA-6587385159085608963-2
Page load happened
46979 https://kw.com/agent/UPA-6587385159085608963-2
***********************************
https://kw.com/agent/UPA-6790765664295120896-7
Page load happened
46980 https://kw.com/agent/UPA-6790765664295120896-7
***********************************
https://kw.com/agent/UPA-6587385017211502600-9
Page load happened
46981 https://kw.com/agent/UPA-6587385017211502600-9
***********************************
https://kw.com/agent/UPA-6587385292683313158-0
Page load happened
46982 https://kw.com/agent/UPA-6587385292683313158-