In [None]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9) Gecko/2008052906 Firefox/3.0',
}

# Initialize lists to store data
all_data = []

for letter in range(ord('A'), ord('Z')+1):
    # Fetch the page content
    r = requests.get(f"https://www.inflibnet.ac.in/universitydirectory/browse.php?alpha={chr(letter)}", headers=headers)
    soup = BeautifulSoup(r.content, 'html.parser')

    # Initialize lists to store data for this letter
    universities = []
    websites = []
    contacts = []

    # Find all university containers
    universities3 = soup.find_all('div', class_='col-md-12 col-lg-12 col-sm-12 col-xs-12')

    for university2 in universities3:
        # Extract university name and website
        uni_title_eleme = university2.find_all('h3', class_='uni_title')

        for elem in uni_title_eleme:
            uni_title_web = elem.text.strip()  # Extracted university name with possible URL

            # Extract website URL
            web_pattern = re.search(r'http://\S+', uni_title_web)
            if web_pattern:
                web_address = web_pattern.group()
            else:
                web_address = None

            # Remove the URL from the string
            uni_title = re.sub(r'http[s]?://\S+', '', uni_title_web).strip()

            universities.append(uni_title)
            websites.append(web_address)

        # Extract contact details
        prof_details = university2.find_all('li')
        for prof in prof_details:
            designation_elem = prof.find('h6', class_='prof_designation')
            designation = designation_elem.text.strip() if designation_elem else None
            name_elem = prof.find('p', class_='prof_name')
            name = name_elem.text.strip() if name_elem else None
            phone_elem = prof.find('p', class_='prof_phone')
            phone = phone_elem.text.strip() if phone_elem else None
            email_elem = prof.find('h6', class_='prof_email')
            email = email_elem.a['href'].replace('mailto:', '').strip() if email_elem and email_elem.a else None

            contacts.append({
                'designation': designation,
                'name': name,
                'phone': phone,
                'email': email
            })

    # Organize data into rows with 5 contact details per row
    for i in range(len(universities)):
        uni_name = universities[i] if i < len(universities) else None
        web_address = websites[i] if i < len(websites) else None
        row = [uni_name, web_address]
        for j in range(5):
            contact_index = i * 5 + j
            if contact_index < len(contacts):
                contact = contacts[contact_index]
                row.extend([contact['designation'], contact['name'], contact['phone'], contact['email']])
            else:
                row.extend([None, None, None, None])
        all_data.append(row)

# Define column headers
columns = ['University Name', 'Website']
for i in range(1, 6):
    columns.extend([f'Contact {i} Designation', f'Contact {i} Name', f'Contact {i} Phone', f'Contact {i} Email'])

# Create DataFrame and save to Excel
df = pd.DataFrame(all_data, columns=columns)
df.to_excel('university_contacts_A_to_Z_.xlsx', index=False)

print("Data has been saved to university_contacts_A_to_Z_.xlsx")
