In [1]:
# Import necessary libraries
import re
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from IPython.core.display import display,HTML

In [2]:
class Doctors:
    @staticmethod
    def scrap_details():
        # target url
        URL = "https://www.icliniq.com/search/online-doctors-directory"
        page = requests.get(URL)

        soup = BeautifulSoup(page.content, "html.parser")
        results = soup.find(class_ ="ic-doctor-list")
        
        # Doctor Names
        names = results.find_all("h3", class_="case-study")
        name_list = []
        for name in names:
            name_list.append(name.find('a').text)
        
        # Doctors Rating
        rating = results.find_all("div", class_="overall-rating")
        rate_list = []
        for rate in rating:
            rate_list.append(re.findall(r'[-+]?\d*\.\d+|\d+', rate.text)[0])
        
        # Doctors Experience
        experience = results.find_all('p')
        exp_list = []
        for i in experience:
            if i.find('p', class_="m-0") == None:
                continue
            else:
                exp_list.append(re.findall(r'[0-9]{1,2}', i.find('span', class_="font-weight-bold").text)[0] + "+ yrs")
    
        #Doctors specialization
        specializations = results.find_all('div', class_="mb-2")
        specialize_list = []
        for i in specializations:
            if i.find('a').text == "Next Page":
                continue
            else:
                specialize_list.append(i.find('a').text)
        
        # languages 
        languages = results.find_all('div', class_="grid-display")
        lang_list = []
        for i in languages:
            lang_list.append(i.find('p', class_="m-0").find_next('p').find_next('p').
                             find_next('p').find('span', class_="font-weight-bold").text)
    
        links = results.find_all('div', class_="media-body")
        link_list = []
        for link in links:
            if link.find('a', attrs={'href': re.compile("^/doctor/")}) == None:
                continue
            else:
                link_list.append("https://www.icliniq.com"+link.find('a', attrs={'href': re.compile("^/doctor/")})
                                 .get('href'))
            
        # degrees 
        degree_list = []
        for link in link_list:
            page = requests.get(link)
            soup = BeautifulSoup(page.content, "html.parser")
            results = soup.find(class_="container-fluid pt-2")

            degree = results.find_all('div', class_="media-body align-self-center pl-1")
            for i in degree:
                degree_list.append((i.find('span', class_="small").text).strip())
            
        # address 
        address_list = []
        for link in link_list:
            page = requests.get(link)
            soup = BeautifulSoup(page.content, "html.parser")
            results = soup.find(class_="container-fluid pt-2")

            addresses = results.find_all('div', class_="loaction mt-1")
            for i in addresses:
                address_list.append((i.find('p').find_next('p').text).strip())
            
        # fees
        query_fees_list = []
        phone_fees_list = []
        for link in link_list:
            page = requests.get(link)
            soup = BeautifulSoup(page.content, "html.parser")
            results = soup.find(class_="container-fluid pt-2")

            queries = results.find_all('div', class_="mb-3 font-weight-bold")
            for i in queries:
                query_fees_list.append(re.findall(r"[Rs]+.\d+" ,i.find_next('p').text)[0])
                phone_fees_list.append(re.findall(r"[Rs]+.\d+", i.find_next('p').find_next('p').text)[0])
    
        #photos
        photos_link = []
        for link in link_list:
            page = requests.get(link)
            soup = BeautifulSoup(page.content, "html.parser")
            results = soup.find(class_="container-fluid pt-2")

            photos = results.find_all('div', class_="d-flex justify-content-center")

            for i in photos:
                photos_link.append(i.find('img').get('src'))
            
    
        # Dataframe
        i = 20
        data = pd.DataFrame({"Name" : name_list[:i], 'Photo' : photos_link[:i] ,'Degree' : degree_list[:i], 
                             'Specialization': specialize_list[:i], 'Experience' : exp_list[:i], 
                             'Consulting Language':lang_list[:i], 'Rating' : rate_list[:i],
                             'Query Fee' : query_fees_list[:i], 'Phone/Video Fee' : phone_fees_list[:i] ,
                             'Address' : address_list[:i], 'Profile Link' : link_list[:i]})
        data.index = np.arange(1,21,1)

        # displaying images in dataframe
        def path_to_image_html(path):
            return '<img src="'+ path + '" width="60" >'

        pd.set_option('display.max_colwidth', None)

        image_cols = ['Photo']
        format_dict = {}
        for image_col in image_cols:
            format_dict[image_col] = path_to_image_html
        display(HTML(data.to_html(escape=False ,formatters=format_dict)))
        
Doctors().scrap_details()

Unnamed: 0,Name,Photo,Degree,Specialization,Experience,Consulting Language,Rating,Query Fee,Phone/Video Fee,Address,Profile Link
1,Dr. Rajesh Jain,,MBBS,Family Physician,29+ yrs,"British English, English, Middle English, U.S. English",5.0,Rs.199,Rs.399,"8 Varsha Colony Kasamwadi Chowk Old Mehrun Road Jalgaon , Jalgaon, Maharashtra, Zip: 425001 , IN",https://www.icliniq.com/doctor/dr-rajesh-jain
2,Dr. Bharatesh Devendra Basti,,"MBBS., DVD., MD COMMUNITY MEDICINE",Community Medicine,23+ yrs,English,4.44,Rs.199,Rs.399,"74 Sfs 208 Yelahanka New Town, Bangalore, Karnataka, Zip: 560064, IN",https://www.icliniq.com/doctor/dr-bharatesh-devendra-basti
3,Dr. Muhammad Zohaib Siddiq,,"MBBS., FCPS PG CARDIOLOGY",Cardiology,11+ yrs,"Hindi, Sindhi, Urdu",4.45,Rs.389,Rs.969,"Nicvd, Rafiqui Shaheed Road, Karachi, Sindh, Zip: 021, PK",https://www.icliniq.com/doctor/dr-muhammad-zohaib-siddiq
4,Dr. Sajeev Kumar,,"MBBS., DCH., CSC",Child Health,33+ yrs,"Hindi, Malayalam",4.5,Rs.199,Rs.399,"Anaswarakeezhcherimel, Chengannur, Kerala, Zip: 689121, IN",https://www.icliniq.com/doctor/dr-sajeev-kumar
5,Dr. Snehal Laul,,"DDV (DERMATOLOGY & VENEROLOGY)., M.B.B.S.",Dermatology,4+ yrs,English,4.2,Rs.199,Rs.399,"Dermatology And Venerology , Nashik, Maharashtra, Zip: 422003, IN",https://www.icliniq.com/doctor/dr-snehal-laul
6,Dr. Arun R Kaushik,,"MBBS., MD",Microbiology,10+ yrs,"English, Hindi, Kannada, Malayalam, Tamil, Telugu",4.13,Rs.220,Rs.399,"Near 17th Cross, Bangalore, Karnataka, Zip: 560003, IN",https://www.icliniq.com/doctor/dr-arun-r-kaushik
7,Dr. Richa Agarwal,,"MBBS., DGO",Obstetrics And Gynaecology,23+ yrs,English,4.37,Rs.300,Rs.499,"N-405,sector-66,gurgaon., Gurgaon, Haryana, Zip: 122018, IN",https://www.icliniq.com/doctor/drricha-agarwal
8,Dr. Thiyagarajan T,,"MBBS., AMERICAN BOARD ENDOCRINOLOGY., AMERICAN BOARD INTERNAL MEDICINE",Endocrinology,12+ yrs,English,4.5,Rs.199,Rs.399,"Codissia Road Civil Aerodrome Post, Coimbatore, Tamilnadu, Zip: 641014, IN",https://www.icliniq.com/doctor/dr-thiyagarajan-t
9,Mohammed Wajid,,PHYSIOTHERAPY,Physiotherapy,17+ yrs,"English, Hindi, Telugu, Urdu",5.0,Rs.299,Rs.499,"Psg Hospital Avinashi Road, Coimbatore, Tamil Nadu, Zip: 641004, IN",https://www.icliniq.com/doctor/dr-mohammed-wajid
10,Dr. Krishna Swaroop Achanta,,"B.D.S.-RGUHS,",Dentistry,5+ yrs,"English, Hindi, Kannada, Telugu",5.0,Rs.199,Rs.399,"1-8-504/4 Hameed Plaza, Viqar Nagar, Begumpet, Secunderabad., Hyderabad, Telangana, Zip: 500003, IN",https://www.icliniq.com/doctor/dr-krishna-swaroop-achanta
