In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from urllib.parse import quote_plus

In [2]:
states = [
    "andhra-pradesh", "arunachal-pradesh", "assam", "bihar", "chhattisgarh", "goa",
    "gujarat", "haryana", "himachal-pradesh", "jharkhand", "karnataka", "kerala",
    "madhya-pradesh", "maharashtra", "manipur", "meghalaya", "mizoram", "nagaland",
    "odisha", "punjab", "rajasthan", "sikkim", "tamil-nadu", "telangana", "tripura",
    "uttar-pradesh", "uttarakhand", "west-bengal", "andaman-and-nicobar-islands", 
    "delhi-ncr", "puducherry"
]

In [3]:
base_url = "https://www.getmyuni.com/all-colleges?state="

In [4]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"
}

In [5]:
college_names = []
locations = []
college_types = []
ratings = []
courses = []
tuition_start_range = []
tuition_end_range = []

In [11]:
for state in states:
    state_url = base_url + quote_plus(state) 
    
    try:
        response = requests.get(state_url, headers=headers)
        response.raise_for_status()  

        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all college cards
        college_cards = soup.find_all("div", class_="college__card__new")
        fees = soup.find_all('span', class_='highlight__value')

        # Process fee ranges
        for fee in fees:
            text = fee.get_text(strip=True)
            if '₹' in text:
                amounts = text.replace('₹', '').replace(',', '').split('-')
                if len(amounts) == 2:
                    tuition_start_range.append(amounts[0].strip())
                    tuition_end_range.append(amounts[1].strip())
                elif len(amounts) == 1:
                    tuition_start_range.append(amounts[0].strip())
                    tuition_end_range.append("")
                else:
                    tuition_start_range.append("")
                    tuition_end_range.append("")

        # Extract college information for each card
        for card in college_cards:
            # College name
            name = card.find("h2", class_="college__name")
            college_names.append(name.get_text(strip=True) if name else "N/A")

            # Location
            location = card.find("span", class_="list__style college__location")
            locations.append(location.get_text(strip=True) if location else "N/A")

            # College type
            college_type = card.find("span", class_="list__style college__affiliation")
            college_types.append(college_type.get_text(strip=True) if college_type else "N/A")

            # Rating
            rating = card.find("span", class_="list__style college__rating")
            ratings.append(rating.get_text(strip=True) if rating else "N/A")

            # Courses offered (as number)
            course = card.find("span", class_="highlight__value")
            if course:
                try:
                    number = ''.join(filter(str.isdigit, course.get_text(strip=True)))
                    courses.append(int(number) if number else 0)
                except ValueError:
                    courses.append(0)
            else:
                courses.append(0)

    except Exception as e:
        print(f"Error processing state {state}: {e}")

In [12]:
d = {
    "College Name": college_names[:len(tuition_start_range)],
    "Location": locations[:len(tuition_start_range)],
    "Type": college_types[:len(tuition_start_range)],
    "Rating": ratings[:len(tuition_start_range)],
    "Courses offered": courses[:len(tuition_start_range)],
    "Fee Start Range": tuition_start_range,
    "Fee End Range": tuition_end_range
}

In [13]:
df1 = pd.DataFrame(d)

In [14]:
df1

Unnamed: 0,College Name,Location,Type,Rating,Courses offered,Fee Start Range,Fee End Range
0,Andhra University,"Visakhapatnam, Andhra Pradesh",Public,3.6,34,16 K,14.44 L
1,KL University,"Guntur, Andhra Pradesh",Private,3.5,23,1.40 L,15.60 L
2,Sri Venkateswara University (SVU),"Tirupati, Andhra Pradesh",Public,3.5,18,8 K,2.71 L
3,Aditya Engineering College,"East Godavari, Andhra Pradesh",Private,3.7,5,54 K,2.01 L
4,Vignan Online,"Guntur, Andhra Pradesh",,,3,1.10 L,1.35 L
...,...,...,...,...,...,...,...
455,The Global Open University,"Dimapur, Nagaland",Public,,21,45 K,5 L
456,Alder College,"Kohima, Nagaland",Public,,1,28 K,75 K
457,"Anderson Theological College, Zunhebotto","Zunheboto, Nagaland",Private,,2,10 K,25 K
458,"Discipleship Bible College, Dimapur","Dimapur, Nagaland",Private,,1,30 K,1.25 L


In [15]:
df1.to_csv('all_colege_list.csv', index=False)

print("Data saved to output_file.csv")


Data saved to output_file.csv
