<a href="https://colab.research.google.com/github/Ayush-0108/Chat-Bot/blob/institute/institute.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

EVENTS

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

URL = "https://iiitdwd.ac.in/events/"
headers = {
    "User-Agent": "Mozilla/5.0 (IIITDWD Bot; scraping for educational/demo purposes; contact: youremail@example.com)"
}

resp = requests.get(URL, headers=headers)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, 'html.parser')

div_main = soup.find_all('div', class_='text-card-foreground flex flex-col overflow-hidden max-w-md group hover:-translate-y-2 transition-all duration-300 hover:shadow-xl border bg-white rounded-lg shadow-sm py-0 gap-0')

event_data = []
event_id = 1  # Start from 1

for div in div_main:
    # Image
    image_div = div.find('div', class_='relative h-64 flex-none w-full')
    image_tag = image_div.find('img') if image_div else None
    image_link = image_tag['src'].strip() if image_tag and image_tag.has_attr('src') else np.nan

    # Card content
    card_content = div.find('div', class_='px-4 py-6 justify-between flex flex-col h-full')

    # Title
    title_tag = card_content.find('h2') if card_content else None
    title = title_tag.text.strip() if title_tag else np.nan

    # Date
    date_div = card_content.find('div', class_='flex text-body font-medium text-gray-500 mb-1') if card_content else None
    event_date = date_div.text.strip() if date_div else np.nan

    # Venue
    venue_div = card_content.find('div', class_='flex text-body font-medium text-gray-500') if card_content else None
    venue = venue_div.text.strip() if venue_div else np.nan

    # Description (same as title)
    description = title

    # Organizer & registration link (not present)
    organizer = np.nan
    registration_link = np.nan

    # Add row
    event_data.append([
        event_id,
        title,
        event_date,
        description,
        venue,
        organizer,
        registration_link,
        image_link
    ])
    event_id += 1

# Save to CSV
columns = ['event_id', 'title', 'event_date', 'description', 'venue', 'organizer', 'registration_link', 'image_link']
df = pd.DataFrame(event_data, columns=columns)
df.to_csv("events.csv", index=False)


RECRUITEMENT

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

URL = "https://iiitdwd.ac.in/careers/"

headers = {
    "User-Agent": "Mozilla/5.0 (IIITDWD Bot; scraping for educational/demo purposes; contact: youremail@example.com)"
}

resp = requests.get(URL, headers=headers)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, 'html.parser')

div_main = soup.find('div', class_='relative w-full overflow-x-auto rounded-lg border overflow-y-hidden')
table = div_main.find('table')

rows = []
recruitment_id = 1

for tr in table.find_all('tr')[1:]:
    tds = tr.find_all('td')
    if len(tds) < 5:
        continue

    position = tds[0].get_text(strip=True) if tds[0] else np.nan
    department = tds[1].get_text(strip=True) if tds[1] else np.nan
    posting_date = tds[2].get_text(strip=True) if tds[2] else np.nan
    closing_date = tds[3].get_text(strip=True) if tds[3] else np.nan

    link_tag = tds[4].find('a')
    job_description = link_tag.get_text(strip=True) if link_tag else np.nan
    application_link = link_tag.get('href') if link_tag else np.nan

    rows.append([
        recruitment_id,
        position,
        department,
        posting_date,
        closing_date,
        job_description,
        application_link
    ])
    recruitment_id += 1

columns = [
    'recruitment_id',
    'position',
    'department',
    'posting_date',
    'closing_date',
    'job_description',
    'application_link'
]

df = pd.DataFrame(rows, columns=columns)
df.to_csv("recruitments_data.csv", index=False)



CONTACTS

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

URL = "https://iiitdwd.ac.in/contact/"

headers = {
    "User-Agent": "Mozilla/5.0 (IIITDWD Bot; scraping for educational/demo purposes; contact: youremail@example.com)"
}

resp = requests.get(URL, headers=headers)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, 'html.parser')

divs = soup.find_all('div', class_='flex flex-col gap-4 md:gap-8')
contacts = {"contact_id": [], "contact_heading": [], "contact": []}

contact_id = 1

for div in divs:
    contact_heading = div.find('h2').text.strip()
    list_items = div.find_all('li')

    for contact in list_items:
        cont = contact.text.strip()
        contacts["contact_id"].append(contact_id)
        contacts["contact_heading"].append(contact_heading)
        contacts["contact"].append(cont)

        contact_id += 1
import pandas as pd

df = pd.DataFrame(contacts)

grouped_df = df.groupby('contact_heading')['contact'].apply(lambda x: '\n'.join(x)).reset_index()

grouped_df.insert(0, 'contact_id', range(1, len(grouped_df) + 1))

grouped_df.to_csv("contacts.csv", index=False)


TENDERS

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urljoin

url = "https://iiitdwd.ac.in/tenders/"
base_url = "https://iiitdwd.ac.in"
headers = {"User-Agent": "Mozilla/5.0"}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

table = soup.find("table")
rows = table.find_all("tr")
data = []

for row in rows:
    cols = row.find_all(["td", "th"])
    row_data = []
    for col in cols:
        a_tag = col.find("a", href=True)
        if a_tag:
            link = urljoin(base_url, a_tag['href'])  # Make it absolute
            text = col.get_text(strip=True).replace(a_tag.get_text(strip=True), '').strip()
            row_data.append(f"{text} {link}".strip())
        else:
            row_data.append(col.get_text(strip=True))
    data.append(row_data)

# Make DataFrame
df = pd.DataFrame(data[1:], columns=data[0])  # Use first row as headers
df.to_csv("ActiveTenders.csv", index=False)

