In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Sample academy dataset
college_data = {
  "names": [
    "Madurai Kamaraj University",
    "Thiagarajar College of Engineering (TCE)",
    "Anna University Regional Campus, Madurai",
    "Velammal College of Engineering and Technology (VCET)",
    "Madurai Medical College (MMC)",
    "The American College",
    "Yadava College",
    "Government Law College, Madurai",
    "Agricultural College and Research Institute (ACRI), Madurai",
    "CSI College of Dental Science and Research"
  ],
  "descriptions": [
    "Madurai Kamaraj University - A well-known public university offering UG, PG, and PhD programs in various disciplines.",
    "Thiagarajar College of Engineering (TCE) - One of the top engineering colleges in Tamil Nadu, affiliated with Anna University.",
    "Anna University Regional Campus, Madurai - Offers B.E, B.Tech, and MBA programs under Anna University.",
    "Velammal College of Engineering and Technology (VCET) - A reputed private engineering college offering undergraduate and postgraduate courses.",
    "Madurai Medical College (MMC) - A leading government medical college providing MBBS and other medical courses.",
    "The American College - A prestigious arts and science college with a long history of academic excellence.",
    "Yadava College - A private-aided college offering a wide range of undergraduate and postgraduate courses.",
    "Government Law College, Madurai - One of the top law colleges in Tamil Nadu offering LLB and legal studies.",
    "Agricultural College and Research Institute (ACRI), Madurai - Focused on agricultural sciences, affiliated with Tamil Nadu Agricultural University.",
    "CSI College of Dental Science and Research - A premier institute for dental studies, offering BDS and MDS programs."
  ]
}


In [4]:
# Create a DataFrame
college_df = pd.DataFrame(college_data)
print(college_df)

                                               names  \
0                         Madurai Kamaraj University   
1           Thiagarajar College of Engineering (TCE)   
2           Anna University Regional Campus, Madurai   
3  Velammal College of Engineering and Technology...   
4                      Madurai Medical College (MMC)   
5                               The American College   
6                                     Yadava College   
7                    Government Law College, Madurai   
8  Agricultural College and Research Institute (A...   
9         CSI College of Dental Science and Research   

                                        descriptions  
0  Madurai Kamaraj University - A well-known publ...  
1  Thiagarajar College of Engineering (TCE) - One...  
2  Anna University Regional Campus, Madurai - Off...  
3  Velammal College of Engineering and Technology...  
4  Madurai Medical College (MMC) - A leading gove...  
5  The American College - A prestigious arts and ... 

In [7]:
def get_content_based_recommendations(college_name, college_df, n_recommendations=3):

    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(college_df['descriptions'])
    #Find similarity
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

    cosine_sim_df = pd.DataFrame(cosine_sim, index=college_df['names'], columns=college_df['names'])

    sim_scores = cosine_sim_df[college_name].sort_values(ascending=False)
    similar_college = sim_scores[sim_scores.index != college_name].head(n_recommendations)
    return similar_college

In [10]:
college_to_recommend = 'The American College'
recommendations = get_content_based_recommendations(college_to_recommend, college_df)
print(f"Recommendations for '{college_to_recommend}':")
print(recommendations)

Recommendations for 'The American College':
names
CSI College of Dental Science and Research               0.110536
Yadava College                                           0.096361
Velammal College of Engineering and Technology (VCET)    0.082397
Name: The American College, dtype: float64
