In [26]:
! pip install pandas
! pip install numpy
! pip install scikit-learn
! pip install matplotlib
! pip install seaborn



In [27]:
import pandas as pd
import numpy as np
import ast  # for parsing genre strings
from sklearn.preprocessing import MultiLabelBinarizer, MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import seaborn as sns
import matplotlib.pyplot as plt
import re

In [28]:
df = pd.read_csv("engineering colleges in India.csv", low_memory=False)
df.head(10)

Unnamed: 0,College Name,Genders Accepted,Campus Size,Total Student Enrollments,Total Faculty,Established Year,Rating,University,Courses,Facilities,City,State,Country,College Type,Average Fees
0,National Institute of Technology Rourkela,Co-Ed,647 Acres,4811,329,2007,3.12,,"B.Tech Mechanical Engineering, B.Tech Computer...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Rourkela,Odisha,India,Public/Government,350600.0
1,Visvesvaraya National Institute of Technology ...,Co-Ed,215 Acres,1071,192,2007,,,"B.Tech Computer Science Engineering, B.Tech El...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Nagpur,Maharashtra,India,Public/Government,273596.6666666667
2,Netaji Subhas University of Technology,Co-Ed,145 Acres,3759,101,2018,,,"B.Tech Computer Engineering, B.Tech Electronic...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",New Delhi,Delhi,India,Public/Government,352320.0
3,Birla Institute of Technology,Co-Ed,980 Acres,4506,284,1955,,,"B.Tech Computer Science Engineering, B.Tech El...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Ranchi,Jharkhand,India,Private,596686.0465116279
4,International Institute of Information Technology,Co-Ed,66 Acres,1721,85,2001,3.05,,"B.Tech Computer Science and Engineering, M.Tec...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Hyderabad,Telangana,India,Private,756666.6666666666
5,BMS College of Engineering,Co-Ed,15 Acres,5610,298,1946,3.83,"Visvesvaraya Technological University, Belagavi","BE Civil Engineering, BE Computer Science and ...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Bengaluru,Karnataka,India,Private,71558.66666666667
6,Institute of Chemical Technology,Co-Ed,16 Acres,1753,116,2008,3.77,,"BE Chemical Engineering, B.Tech Fibres and Tex...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Mumbai,Maharashtra,India,Public/Government,189512.8205128205
7,Banaras Hindu University,Co-Ed,1300 Acres,27946,1637,1916,3.41,,"BA Social Science, BA, B.Com, BA Hons, BA Soci...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Varanasi,Uttar Pradesh,India,Public/Government,82548.89763779528
8,Thapar Institute of Engineering and Technology,Co-Ed,250 Acres,8150,482,1956,3.29,,"BE Computer Engineering, BE Mechanical Enginee...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Patiala,Punjab,India,Private,736823.0769230769
9,Shanmugha Arts Science Technology Research and...,Co-Ed,232 Acres,9838,750,2001,3.54,,"B.Tech Computer Science and Engineering, B.Tec...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Thanjavur,Tamil Nadu,India,Private,415918.0327868852


In [29]:
# We only need Rating , Courses, City , and Average Fees for this project
college = df[['Rating', 'Courses', 'City', 'Average Fees']].copy()
print(college)

     Rating                                            Courses       City  \
0      3.12  B.Tech Mechanical Engineering, B.Tech Computer...   Rourkela   
1       NaN  B.Tech Computer Science Engineering, B.Tech El...     Nagpur   
2       NaN  B.Tech Computer Engineering, B.Tech Electronic...  New Delhi   
3       NaN  B.Tech Computer Science Engineering, B.Tech El...     Ranchi   
4      3.05  B.Tech Computer Science and Engineering, M.Tec...  Hyderabad   
...     ...                                                ...        ...   
5441    NaN  Diploma in Civil Engineering, Diploma in Compu...  Bengaluru   
5442    NaN  Diploma in Civil Engineering, Diploma in Mecha...   Amritsar   
5443    NaN  Diploma in Electrical Engineering, Diploma in ...     Paldhi   
5444    NaN  BE Civil Engineering, BE Electrical Engineerin...       Arvi   
5445    NaN  B.Arch, B.Des Fashion and Textile Design, B.De...     Jaipur   

           Average Fees  
0              350600.0  
1     273596.6666666667

In [30]:
df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')

In [31]:
# Drop rows with missing values in important columns
# Clean column names
df.columns = df.columns.str.strip()

# Drop missing values in essential columns
df = df.dropna(subset=['Rating', 'Courses', 'City', 'Average Fees'])

# Clean 'Average Fees' to keep only numbers
def clean_fees(fee):
    if isinstance(fee, str):
        fee = re.sub(r'[^0-9]', '', fee)
        if fee.isdigit():
            return int(fee)
    return None

df['Average Fees'] = df['Average Fees'].apply(clean_fees)

# Drop rows where Average Fees could not be extracted
df = df.dropna(subset=['Average Fees'])

# Convert 'Rating' to float
try:
    df['Rating'] = df['Rating'].astype(float)
except:
    df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')
    df = df.dropna(subset=['Rating'])

print("Data Preprocessing Done âœ…")
df.head()


Data Preprocessing Done âœ…


Unnamed: 0,College Name,Genders Accepted,Campus Size,Total Student Enrollments,Total Faculty,Established Year,Rating,University,Courses,Facilities,City,State,Country,College Type,Average Fees
0,National Institute of Technology Rourkela,Co-Ed,647 Acres,4811,329,2007,3.12,,"B.Tech Mechanical Engineering, B.Tech Computer...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Rourkela,Odisha,India,Public/Government,3506000
4,International Institute of Information Technology,Co-Ed,66 Acres,1721,85,2001,3.05,,"B.Tech Computer Science and Engineering, M.Tec...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Hyderabad,Telangana,India,Private,7566666666666666
5,BMS College of Engineering,Co-Ed,15 Acres,5610,298,1946,3.83,"Visvesvaraya Technological University, Belagavi","BE Civil Engineering, BE Computer Science and ...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Bengaluru,Karnataka,India,Private,7155866666666667
6,Institute of Chemical Technology,Co-Ed,16 Acres,1753,116,2008,3.77,,"BE Chemical Engineering, B.Tech Fibres and Tex...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Mumbai,Maharashtra,India,Public/Government,1895128205128205
7,Banaras Hindu University,Co-Ed,1300 Acres,27946,1637,1916,3.41,,"BA Social Science, BA, B.Com, BA Hons, BA Soci...","Boys Hostel, Girls Hostel, Gym, Library, Sport...",Varanasi,Uttar Pradesh,India,Public/Government,8254889763779528


In [32]:
# Function to recommend colleges
def recommend_colleges(rating_threshold, desired_course, city_preference, max_fees):
    # Apply filters
    filtered = df[
        (df['Rating'] >= rating_threshold) &
        (df['Courses'].str.contains(desired_course, case=False, na=False)) &
        (df['City'].str.contains(city_preference, case=False, na=False)) &
        (df['Average Fees'] <= max_fees)
    ]

    # Sort by Rating descending
    filtered = filtered.sort_values(by='Rating', ascending=False)

    return filtered[['College Name', 'Courses', 'City', 'Rating', 'Average Fees']]


In [33]:
# Taking input from user
rating_threshold = float(input("Enter minimum Rating (e.g., 3.5): "))
desired_course = input("Enter desired Course (e.g., Computer Science, Mechanical): ").strip()
city_preference = input("Enter preferred City (leave blank for any city): ").strip()
max_fees = int(input("Enter maximum Average Fees (in INR): "))


Enter minimum Rating (e.g., 3.5):  3.77
Enter desired Course (e.g., Computer Science, Mechanical):  Chemical Engineering
Enter preferred City (leave blank for any city):  Mumbai
Enter maximum Average Fees (in INR):  1895128205128205


In [34]:
# Recommend colleges based on inputs
recommended_colleges = recommend_colleges(rating_threshold, desired_course, city_preference, max_fees)

if recommended_colleges.empty:
    print("\nNo colleges match your criteria. Please try different filters.")
else:
    print("\nðŸŽ¯ Recommended Colleges:")
    display(recommended_colleges.head(10))  # Display top 10 results



ðŸŽ¯ Recommended Colleges:


Unnamed: 0,College Name,Courses,City,Rating,Average Fees
6,Institute of Chemical Technology,"BE Chemical Engineering, B.Tech Fibres and Tex...",Mumbai,3.77,1895128205128205
