In [11]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

def extract_programs(url):
    page = requests.get(url).text
    soup = BeautifulSoup(page, 'html.parser')

    # Initialize empty lists for storing data
    ug_programs = []
    departments = []
    pg_programs = []

    # Extract undergraduate programs
    ug_program_items = soup.find_all(class_='qs_program_name_ug')
    ug_programs = [item.find('span').text.strip() for item in ug_program_items]

    # Extract postgraduate programs and departments
    pg_department_items = soup.find_all('div', class_='item')
    for department_item in pg_department_items:
        # Extract department name
        department_name_element = department_item.find('span', class_='pgmname')
        department_name = department_name_element.text.strip() if department_name_element else ""
        departments.append(department_name)

        # Extract programs within the department
        programs = department_item.find_all('a', class_='qs_program_name_pg')
        for program in programs:
            pg_programs.append(program.find('span').text.strip())

    return ug_programs, departments, pg_programs

# Assuming 'university_urls.csv' contains columns named 'University' and 'URL' -- change the all 7 university urls one by one to get all the details
df = pd.read_csv(r'C:\Users\kanim\Downloads\universities_with_no_650.csv')

# Initialize empty lists for storing extracted data
ug_programs_list = []
departments_list = []
pg_programs_list = []
university_names_list = []

# Extract programs for each university URL
for _, row in df.iterrows():
    university_name = row['University']
    url = row['URL']
    ug_programs, departments, pg_programs = extract_programs(url)
    ug_programs_list.append('; '.join(ug_programs))  # Concatenate UG programs into a single string
    departments_list.append('; '.join(departments))  # Concatenate departments into a single string
    pg_programs_list.append('; '.join(pg_programs))  # Concatenate PG programs into a single string
    university_names_list.append(university_name)

# Create a DataFrame from the extracted data
df_output = pd.DataFrame({
    'University': university_names_list,
    'UG Programs': ug_programs_list,
    'PG Programs': pg_programs_list
})

# Print the output DataFrame
print(df_output)

# Save the DataFrame to a CSV file
df_output.to_csv('programsavailable7.csv', index=False)


                                            University  \
0                                 Beykent Üniversitesi   
1                                  Bath Spa University   
2                                     Brown University   
3             Odessa I.I.Mechnikov National University   
4    Skolkovo Institute of Science and Technology S...   
..                                                 ...   
375                               Academy of Fine Arts   
376    George Washington University School of Business   
377                                  Howard University   
378         Polytechnic University of the Philippines    
379                    Universidad del Sagrado Corazón   

                                           UG Programs  \
0                                                        
1    Acting; Acting (Professional Placement Year); ...   
2    Bachelor of Science in Linguistics; Bachelor o...   
3                                                        
4            