In [105]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup


In [106]:
webpage = [
    "https://www.daiict.ac.in/faculty",
    "https://www.daiict.ac.in/adjunct-faculty",
    "https://www.daiict.ac.in/adjunct-faculty-international",
    "https://www.daiict.ac.in/distinguished-professor",
    "https://www.daiict.ac.in/professor-practice"
]

dau_faculty = pd.DataFrame()

for j in webpage:

    webpage_html = requests.get(j).text
    soup = BeautifulSoup(webpage_html, "lxml")

    faculty = soup.find_all("div", class_="facultyDetails")

  
    name = []
    profile_url = []
    education = []
    email = []
    contact_number = []
    research_area = []

    for i in faculty:

        # name
        try:
            name.append(i.find("h3").get_text(strip=True))
        except:
            name.append(np.nan)

        # profile url
        try:
            profile_url.append(i.find("a")["href"])
        except:
            profile_url.append(np.nan)

        # education
        try:
            education.append(i.find(class_="facultyEducation").get_text(strip=True))
        except:
            education.append(np.nan)

        # email
        try:
            e = i.find("div", class_="contactDetails") \
                 .find("span", class_="facultyemail") \
                 .get_text(strip=True)
            email.append(e.replace("[at]", "@").replace("[dot]", "."))
        except:
            email.append(np.nan)

        # contact number
        try:
            contact_number.append(
                i.find("span", class_="facultyNumber").get_text(strip=True)
            )
        except:
            contact_number.append(np.nan)

        # research area
        try:
            research_area.append(
                i.find("div", class_="areaSpecialization").get_text(" ", strip=True)
            )
        except:
            research_area.append(np.nan)

    
    df = pd.DataFrame({
        "name": name,
        "profile_url": profile_url,
        "education": education,
        "email": email,
        "contact_number": contact_number,
        "research_area": research_area
    })

    
    dau_faculty = pd.concat([dau_faculty, df], ignore_index=True)


In [107]:
dau_faculty.info()

<class 'pandas.DataFrame'>
RangeIndex: 111 entries, 0 to 110
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   name            111 non-null    str  
 1   profile_url     111 non-null    str  
 2   education       111 non-null    str  
 3   email           110 non-null    str  
 4   contact_number  107 non-null    str  
 5   research_area   108 non-null    str  
dtypes: str(6)
memory usage: 5.3 KB


In [110]:
import random

In [None]:
dau_faculty.insert(0, 'faculty_id',random.sample(["F-" + str(i) for i in range(1000, 9999)],len(dau_faculty)))


In [114]:
dau_faculty

Unnamed: 0,faculty_id,name,profile_url,education,email,contact_number,research_area
0,F-8373,Abhishek gupta,https://www.daiict.ac.in/faculty/abhishek-gupta,"PhD (Electrical and Computer Engineering), Tor...",abhishek_gupta@dau.ac.in,079-68261598,"Machine Learning, Statistical Signal Processin..."
1,F-6235,Abhishek jindal,https://www.daiict.ac.in/faculty/abhishek-jindal,"PhD (Electronics & Communication Engineering),...",abhishek_jindal@dau.ac.in,079-68261654,"Reinforcement Learning, Deep Learning for Fina..."
2,F-9246,Abhishek tilva,https://www.daiict.ac.in/faculty/abhishek-tilva,"PhD (Statistics), Columbia University, New Yor...",abhishek_tilva@dau.ac.in,079-68261549,"Arbitrage Theory, Stochastic Portfolio Theory,..."
3,F-1832,Aditya tatu,https://www.daiict.ac.in/faculty/aditya-tatu,"PhD (Computer Science), University of Copenhag...",aditya_tatu@dau.ac.in,079-68261540,"Computer Vision, Image Processing, Pattern Rec..."
4,F-8863,Ajay beniwal,https://www.daiict.ac.in/faculty/ajay-beniwal,PhD (Electronics and Communication Engineering...,ajay_beniwal@dau.ac.in,079-68261745,Flexible and Printable Electronics for Healthc...
...,...,...,...,...,...,...,...
106,F-7257,Vishvajit pandya,https://www.daiict.ac.in/distinguished-profess...,"PhD (Anthropology), University of Chicago, USA",vishvajit_pandya@dau.ac.in,079-68261543,"Material Culture, Design and Communication Cul..."
107,F-6553,Ajay tomar,https://www.daiict.ac.in/professor-practice/aj...,"I.P.S, (1989)",ajay_tomar@dau.ac.in,,Mr. Ajay Tomar was retired as the Commissioner...
108,F-8180,Anirban dutta gupta,https://www.daiict.ac.in/professor-practice/an...,"Graduate in Visual Communication Design, NID A...",anirban_dutta@dau.ac.in,,"Natural History & Ethnographic Documentary, Ph..."
109,F-2461,Harpreet singh jattana,https://www.daiict.ac.in/professor-practice/ha...,PhD - Pursuing (Microelectronics – SOI CMOS Pr...,harpreetsingh_jattana@dau.ac.in,079-68261718,"CMOS Process Development, Device Reliability, ..."


In [115]:
dau_faculty.to_csv("dau_faculty.csv", index=False)


In [116]:
pd.read_csv("dau_faculty.csv")

Unnamed: 0,faculty_id,name,profile_url,education,email,contact_number,research_area
0,F-8373,Abhishek gupta,https://www.daiict.ac.in/faculty/abhishek-gupta,"PhD (Electrical and Computer Engineering), Tor...",abhishek_gupta@dau.ac.in,079-68261598,"Machine Learning, Statistical Signal Processin..."
1,F-6235,Abhishek jindal,https://www.daiict.ac.in/faculty/abhishek-jindal,"PhD (Electronics & Communication Engineering),...",abhishek_jindal@dau.ac.in,079-68261654,"Reinforcement Learning, Deep Learning for Fina..."
2,F-9246,Abhishek tilva,https://www.daiict.ac.in/faculty/abhishek-tilva,"PhD (Statistics), Columbia University, New Yor...",abhishek_tilva@dau.ac.in,079-68261549,"Arbitrage Theory, Stochastic Portfolio Theory,..."
3,F-1832,Aditya tatu,https://www.daiict.ac.in/faculty/aditya-tatu,"PhD (Computer Science), University of Copenhag...",aditya_tatu@dau.ac.in,079-68261540,"Computer Vision, Image Processing, Pattern Rec..."
4,F-8863,Ajay beniwal,https://www.daiict.ac.in/faculty/ajay-beniwal,PhD (Electronics and Communication Engineering...,ajay_beniwal@dau.ac.in,079-68261745,Flexible and Printable Electronics for Healthc...
...,...,...,...,...,...,...,...
106,F-7257,Vishvajit pandya,https://www.daiict.ac.in/distinguished-profess...,"PhD (Anthropology), University of Chicago, USA",vishvajit_pandya@dau.ac.in,079-68261543,"Material Culture, Design and Communication Cul..."
107,F-6553,Ajay tomar,https://www.daiict.ac.in/professor-practice/aj...,"I.P.S, (1989)",ajay_tomar@dau.ac.in,,Mr. Ajay Tomar was retired as the Commissioner...
108,F-8180,Anirban dutta gupta,https://www.daiict.ac.in/professor-practice/an...,"Graduate in Visual Communication Design, NID A...",anirban_dutta@dau.ac.in,,"Natural History & Ethnographic Documentary, Ph..."
109,F-2461,Harpreet singh jattana,https://www.daiict.ac.in/professor-practice/ha...,PhD - Pursuing (Microelectronics – SOI CMOS Pr...,harpreetsingh_jattana@dau.ac.in,079-68261718,"CMOS Process Development, Device Reliability, ..."
