In [1]:
import requests
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

# Setup Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Optional: run without opening the browser window
chrome_driver_path = 'C:\\Driver\\chromedriver-win64\\chromedriver-win64\\chromedriver.exe'
service = Service(executable_path=chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

headers = {
    'User-Agent': 'Chrome/92.0.4515.159'
}

# URLs for faculty data
URL_1 = 'http://isb.nu.edu.pk/Faculty/GetAllEmp?id=301'
URL_2 = 'http://isb.nu.edu.pk/Faculty/GetAllEmp?id=302'
URL_3 = 'http://isb.nu.edu.pk/Faculty/GetAllEmp?id=303'
URL_4 = 'http://isb.nu.edu.pk/Faculty/GetAllEmp?id=313'

# Fetch data using requests
faculty_URL_1 = requests.get(URL_1, headers=headers)
faculty_URL_2 = requests.get(URL_2, headers=headers)
faculty_URL_3 = requests.get(URL_3, headers=headers)
faculty_URL_4 = requests.get(URL_4, headers=headers)

data_1 = faculty_URL_1.json()
data_2 = faculty_URL_2.json()
data_3 = faculty_URL_3.json()
data_4 = faculty_URL_4.json()

# Combine all data
faculty = [data_1, data_2, data_3, data_4]

URL_department = 'http://isb.nu.edu.pk/Faculty/allfaculty'
department = requests.get(URL_department, headers=headers)
# print(department)

soup = BeautifulSoup(department.content, 'html.parser')

depertment_name = []
depertment_name.append(soup.find('ul', class_='sub-menu').findAll('li')[0].find('a').text.strip())
depertment_name.append(soup.find('ul', class_='sub-menu').findAll('li')[1].find('a').text.strip())
depertment_name.append(soup.find('ul', class_='sub-menu').findAll('li')[2].find('a').text.strip())
depertment_name.append(soup.find('ul', class_='sub-menu').findAll('li')[3].find('a').text.strip())
# print(depertment_name)

school_name = []
school_name.append(str('FAST School of '+ depertment_name[0]))
school_name.append(str('Department of '+ depertment_name[1]))
school_name.append(str('FAST School of '+ depertment_name[2]))
school_name.append(str('Department of '+ depertment_name[3]))
# print(school_name)


# Create the DataFrame
faculty_islamabad_data = pd.DataFrame(columns=['ID',
                                     'Name',
                                     'Designation',
                                     'HEC Approved PHD Supervisor',
                                     'Highest Education',
                                     'Email',
                                     'School',
                                     'Department',
                                     'Extension',
                                     'ImageURL'])

faculty_islamabad_data.drop(faculty_islamabad_data.index, inplace=True)

base_url_profile = 'http://isb.nu.edu.pk/Faculty/Details/'
base_url_img = 'http://isb.nu.edu.pk/'
count = -1

for faculty_index in faculty:
    count +=1
    for faculty_member in faculty_index:
        education = 'Not Available'
        email ='Not Available'
        id = int(faculty_member['Emp_ID'])
        name = str(faculty_member['Name'])
        designation = str(faculty_member['Designation_Title'])
        hec = bool(faculty_member['hec_supervisor'])
        email = str(faculty_member['Email'].strip()) 
        if email:
            email = str(email.split('On leave')[0])
            if email == '' or email == '(':
                email = 'Not Available'
            else:
                email = email
        else:
            email ='Not Available'
        extension = int(faculty_member['Extension']) if faculty_member['Extension'] else 'Not Available'
        img = str(base_url_img + faculty_member['ImagePath']) if faculty_member['ImagePath'] else 'Not Found'
        school_str = str(school_name[count])
        depertment_str = str(depertment_name[count])
        
        profile_url = str(base_url_profile + str(id))
        print('ID:', id)
        print('Name: ', name)
        print('Designation: ', designation)
        print('HEC: ', hec)
        print('Email: ', email)
        print('Extension: ', extension)
        print('Img: ', img)
        print('School: ', school_str)
        print('Department: ', depertment_str)
        print('Profile_url: ', profile_url)
        
        # Open the URL using Selenium
        driver.get(profile_url)

        try:
            # Wait for the page to load
            # WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, 'body')))
            response = requests.get(profile_url)
            # print('HTTP Status Code:', response.status_code)
            
            # Print the raw HTML content
            page_source = driver.page_source
            # print('Page Content:')
            # print(page_source)
            
            # Convert page source to BeautifulSoup
            soup = BeautifulSoup(page_source, 'html.parser')
            # print('BeautifulSoup Object:',soup.find('span',id='DegreeName').find('li').text.strip())
            edu = str(soup.find('span',id='DegreeName').find('li').text)
            education = edu
            print('Eucation: ',edu)
            
        except Exception as e:
            print(f"Error fetching profile content: {e}")
            

        faculty_member_dataframe = {
            'ID': id,
            'Name': name,
            'Designation': designation,
            'HEC Approved PHD Supervisor': hec,
            'Highest Education': education,
            'Email': email,
            'School': school_str,
            'Department': depertment_str,
            'Extension': extension,
            'ImageURL': img
        }
        
        new_dataframe = pd.DataFrame([faculty_member_dataframe])
        faculty_islamabad_data = pd.concat([faculty_islamabad_data, new_dataframe], ignore_index=True)


# Close the WebDriver
driver.quit()


ID: 4553
Name:  Waseem Shahzad
Designation:  Director & Professor
HEC:  True
Email:  waseem.shahzad@nu.edu.pk
Extension:  101
Img:  http://isb.nu.edu.pk//Images/Profile/CS/4553-3.jpg
School:  FAST School of Computer Science
Department:  Computer Science
Profile_url:  http://isb.nu.edu.pk/Faculty/Details/4553
Eucation:   Ph.D (Computer Science) NUCES, (2010) 
ID: 4551
Name:  Hasan Mujtaba Kiyani
Designation:  Head, School of Computing & Professor
HEC:  True
Email:  hasan.mujtaba@nu.edu.pk
Extension:  626
Img:  http://isb.nu.edu.pk//Images/Profile/CS/4551.jpg
School:  FAST School of Computer Science
Department:  Computer Science
Profile_url:  http://isb.nu.edu.pk/Faculty/Details/4551
Eucation:   Ph.D (Computer Science) National University of Computer & Emerging Sciences (2010) 
ID: 4400
Name:  Hammad Majeed
Designation:  HoD (Computer Science) & Professor
HEC:  True
Email:  hammad.majeed@nu.edu.pk
Extension:  672
Img:  http://isb.nu.edu.pk//Images/Profile/CS/4400.jpg
School:  FAST School

In [2]:
faculty_islamabad_data.to_csv('isb.csv')

In [None]:
# video link for chrome_driver_path is this https://www.youtube.com/watch?v=jglQpvPI58A